diff --git a/conda/dev-environment-unix.yml b/conda/dev-environment-unix.yml index 80b4af66..266f0526 100644 --- a/conda/dev-environment-unix.yml +++ b/conda/dev-environment-unix.yml @@ -18,7 +18,7 @@ dependencies: - gtest - httpx>=0.20,<1 - isort>=5,<6 - - libarrow=15 + - libarrow=16 - librdkafka - libboost-headers - lz4-c @@ -28,7 +28,7 @@ dependencies: - numpy - pillow - psutil - - pyarrow=15 + - pyarrow=16 - pandas - pillow - polars diff --git a/cpp/csp/python/adapters/CMakeLists.txt b/cpp/csp/python/adapters/CMakeLists.txt index a44cd8d3..512182c4 100644 --- a/cpp/csp/python/adapters/CMakeLists.txt +++ b/cpp/csp/python/adapters/CMakeLists.txt @@ -6,7 +6,7 @@ if(CSP_BUILD_KAFKA_ADAPTER) endif() if(CSP_BUILD_PARQUET_ADAPTER) - set(VENDORED_PYARROW_ROOT "${CMAKE_SOURCE_DIR}/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/") + set(VENDORED_PYARROW_ROOT "${CMAKE_SOURCE_DIR}/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/") set(ARROW_PYTHON_SRCS ${VENDORED_PYARROW_ROOT}/arrow/python/arrow_to_pandas.cc ${VENDORED_PYARROW_ROOT}/arrow/python/benchmark.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/ipc.cc b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/ipc.cc deleted file mode 100644 index 93481822..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/ipc.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "ipc.h" - -#include - -#include "arrow/python/pyarrow.h" - -namespace arrow { -namespace py { - -PyRecordBatchReader::PyRecordBatchReader() {} - -Status PyRecordBatchReader::Init(std::shared_ptr schema, PyObject* iterable) { - schema_ = std::move(schema); - - iterator_.reset(PyObject_GetIter(iterable)); - return CheckPyError(); -} - -std::shared_ptr PyRecordBatchReader::schema() const { return schema_; } - -Status PyRecordBatchReader::ReadNext(std::shared_ptr* batch) { - PyAcquireGIL lock; - - if (!iterator_) { - // End of stream - batch->reset(); - return Status::OK(); - } - - OwnedRef py_batch(PyIter_Next(iterator_.obj())); - if (!py_batch) { - RETURN_IF_PYERROR(); - // End of stream - batch->reset(); - iterator_.reset(); - return Status::OK(); - } - - return unwrap_batch(py_batch.obj()).Value(batch); -} - -Result> PyRecordBatchReader::Make( - std::shared_ptr schema, PyObject* iterable) { - auto reader = std::shared_ptr(new PyRecordBatchReader()); - RETURN_NOT_OK(reader->Init(std::move(schema), iterable)); - return reader; -} - -} // namespace py -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/int_util.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/int_util.h deleted file mode 100644 index 59a2ac71..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/int_util.h +++ /dev/null @@ -1,137 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/status.h" - -#include "arrow/util/visibility.h" - -namespace arrow { - -class DataType; -struct ArraySpan; -struct Scalar; - -namespace internal { - -ARROW_EXPORT -uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width = 1); - -ARROW_EXPORT -uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes, - int64_t length, uint8_t min_width = 1); - -ARROW_EXPORT -uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width = 1); - -ARROW_EXPORT -uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length, - uint8_t min_width = 1); - -ARROW_EXPORT -void DowncastInts(const int64_t* source, int8_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastInts(const int64_t* source, int16_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastInts(const int64_t* source, int32_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastInts(const int64_t* source, int64_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length); - -ARROW_EXPORT -void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length); - -ARROW_EXPORT -void UpcastInts(const int32_t* source, int64_t* dest, int64_t length); - -template -inline typename std::enable_if<(sizeof(InputInt) >= sizeof(OutputInt))>::type CastInts( - const InputInt* source, OutputInt* dest, int64_t length) { - DowncastInts(source, dest, length); -} - -template -inline typename std::enable_if<(sizeof(InputInt) < sizeof(OutputInt))>::type CastInts( - const InputInt* source, OutputInt* dest, int64_t length) { - UpcastInts(source, dest, length); -} - -template -ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length, - const int32_t* transpose_map); - -ARROW_EXPORT -Status TransposeInts(const DataType& src_type, const DataType& dest_type, - const uint8_t* src, uint8_t* dest, int64_t src_offset, - int64_t dest_offset, int64_t length, const int32_t* transpose_map); - -/// \brief Do vectorized boundschecking of integer-type array indices. The -/// indices must be nonnegative and strictly less than the passed upper -/// limit (which is usually the length of an array that is being indexed-into). -ARROW_EXPORT -Status CheckIndexBounds(const ArraySpan& values, uint64_t upper_limit); - -/// \brief Boundscheck integer values to determine if they are all between the -/// passed upper and lower limits (inclusive). Upper and lower bounds must be -/// the same type as the data and are not currently casted. -ARROW_EXPORT -Status CheckIntegersInRange(const ArraySpan& values, const Scalar& bound_lower, - const Scalar& bound_upper); - -/// \brief Use CheckIntegersInRange to determine whether the passed integers -/// can fit safely in the passed integer type. This helps quickly determine if -/// integer narrowing (e.g. int64->int32) is safe to do. -ARROW_EXPORT -Status IntegersCanFit(const ArraySpan& values, const DataType& target_type); - -/// \brief Convenience for boundschecking a single Scalar value -ARROW_EXPORT -Status IntegersCanFit(const Scalar& value, const DataType& target_type); - -/// Upcast an integer to the largest possible width (currently 64 bits) - -template -typename std::enable_if< - std::is_integral::value && std::is_signed::value, int64_t>::type -UpcastInt(Integer v) { - return v; -} - -template -typename std::enable_if< - std::is_integral::value && std::is_unsigned::value, uint64_t>::type -UpcastInt(Integer v) { - return v; -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/int_util_overflow.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/int_util_overflow.h deleted file mode 100644 index ffe78be2..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/int_util_overflow.h +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "arrow/status.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -// "safe-math.h" includes from the Windows headers. -#include "arrow/util/windows_compatibility.h" -#include "arrow/vendored/portable-snippets/safe-math.h" -// clang-format off (avoid include reordering) -#include "arrow/util/windows_fixup.h" -// clang-format on - -namespace arrow { -namespace internal { - -// Define functions AddWithOverflow, SubtractWithOverflow, MultiplyWithOverflow -// with the signature `bool(T u, T v, T* out)` where T is an integer type. -// On overflow, these functions return true. Otherwise, false is returned -// and `out` is updated with the result of the operation. - -#define OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \ - [[nodiscard]] static inline bool _func_name(_type u, _type v, _type* out) { \ - return !psnip_safe_##_psnip_type##_##_psnip_op(out, u, v); \ - } - -#define OPS_WITH_OVERFLOW(_func_name, _psnip_op) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, uint8_t, uint8) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, uint16_t, uint16) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, uint32_t, uint32) \ - OP_WITH_OVERFLOW(_func_name, _psnip_op, uint64_t, uint64) - -OPS_WITH_OVERFLOW(AddWithOverflow, add) -OPS_WITH_OVERFLOW(SubtractWithOverflow, sub) -OPS_WITH_OVERFLOW(MultiplyWithOverflow, mul) -OPS_WITH_OVERFLOW(DivideWithOverflow, div) - -#undef OP_WITH_OVERFLOW -#undef OPS_WITH_OVERFLOW - -// Define function NegateWithOverflow with the signature `bool(T u, T* out)` -// where T is a signed integer type. On overflow, these functions return true. -// Otherwise, false is returned and `out` is updated with the result of the -// operation. - -#define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \ - [[nodiscard]] static inline bool _func_name(_type u, _type* out) { \ - return !psnip_safe_##_psnip_type##_##_psnip_op(out, u); \ - } - -#define SIGNED_UNARY_OPS_WITH_OVERFLOW(_func_name, _psnip_op) \ - UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8) \ - UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \ - UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \ - UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64) - -SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg) - -#undef UNARY_OP_WITH_OVERFLOW -#undef SIGNED_UNARY_OPS_WITH_OVERFLOW - -/// Signed addition with well-defined behaviour on overflow (as unsigned) -template -SignedInt SafeSignedAdd(SignedInt u, SignedInt v) { - using UnsignedInt = typename std::make_unsigned::type; - return static_cast(static_cast(u) + - static_cast(v)); -} - -/// Signed subtraction with well-defined behaviour on overflow (as unsigned) -template -SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) { - using UnsignedInt = typename std::make_unsigned::type; - return static_cast(static_cast(u) - - static_cast(v)); -} - -/// Signed negation with well-defined behaviour on overflow (as unsigned) -template -SignedInt SafeSignedNegate(SignedInt u) { - using UnsignedInt = typename std::make_unsigned::type; - return static_cast(~static_cast(u) + 1); -} - -/// Signed left shift with well-defined behaviour on negative numbers or overflow -template -SignedInt SafeLeftShift(SignedInt u, Shift shift) { - using UnsignedInt = typename std::make_unsigned::type; - return static_cast(static_cast(u) << shift); -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/io_util.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/io_util.h deleted file mode 100644 index 113b1bdd..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/io_util.h +++ /dev/null @@ -1,420 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#ifndef _WIN32 -#define ARROW_HAVE_SIGACTION 1 -#endif - -#include -#include -#include -#include -#include - -#if ARROW_HAVE_SIGACTION -#include // Needed for struct sigaction -#endif - -#include "arrow/status.h" -#include "arrow/type_fwd.h" -#include "arrow/util/macros.h" -#include "arrow/util/windows_fixup.h" - -namespace arrow { -namespace internal { - -// NOTE: 8-bit path strings on Windows are encoded using UTF-8. -// Using MBCS would fail encoding some paths. - -#if defined(_WIN32) -using NativePathString = std::wstring; -#else -using NativePathString = std::string; -#endif - -class ARROW_EXPORT PlatformFilename { - public: - struct Impl; - - ~PlatformFilename(); - PlatformFilename(); - PlatformFilename(const PlatformFilename&); - PlatformFilename(PlatformFilename&&); - PlatformFilename& operator=(const PlatformFilename&); - PlatformFilename& operator=(PlatformFilename&&); - explicit PlatformFilename(NativePathString path); - explicit PlatformFilename(const NativePathString::value_type* path); - - const NativePathString& ToNative() const; - std::string ToString() const; - - PlatformFilename Parent() const; - Result Real() const; - - // These functions can fail for character encoding reasons. - static Result FromString(std::string_view file_name); - Result Join(std::string_view child_name) const; - - PlatformFilename Join(const PlatformFilename& child_name) const; - - bool operator==(const PlatformFilename& other) const; - bool operator!=(const PlatformFilename& other) const; - - // Made public to avoid the proliferation of friend declarations. - const Impl* impl() const { return impl_.get(); } - - private: - std::unique_ptr impl_; - - explicit PlatformFilename(Impl impl); -}; - -/// Create a directory if it doesn't exist. -/// -/// Return whether the directory was created. -ARROW_EXPORT -Result CreateDir(const PlatformFilename& dir_path); - -/// Create a directory and its parents if it doesn't exist. -/// -/// Return whether the directory was created. -ARROW_EXPORT -Result CreateDirTree(const PlatformFilename& dir_path); - -/// Delete a directory's contents (but not the directory itself) if it exists. -/// -/// Return whether the directory existed. -ARROW_EXPORT -Result DeleteDirContents(const PlatformFilename& dir_path, - bool allow_not_found = true); - -/// Delete a directory tree if it exists. -/// -/// Return whether the directory existed. -ARROW_EXPORT -Result DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found = true); - -// Non-recursively list the contents of the given directory. -// The returned names are the children's base names, not including dir_path. -ARROW_EXPORT -Result> ListDir(const PlatformFilename& dir_path); - -/// Delete a file if it exists. -/// -/// Return whether the file existed. -ARROW_EXPORT -Result DeleteFile(const PlatformFilename& file_path, bool allow_not_found = true); - -/// Return whether a file exists. -ARROW_EXPORT -Result FileExists(const PlatformFilename& path); - -// TODO expose this more publicly to make it available from io/file.h? -/// A RAII wrapper for a file descriptor. -/// -/// The underlying file descriptor is automatically closed on destruction. -/// Moving is supported with well-defined semantics. -/// Furthermore, closing is idempotent. -class ARROW_EXPORT FileDescriptor { - public: - FileDescriptor() = default; - explicit FileDescriptor(int fd) : fd_(fd) {} - FileDescriptor(FileDescriptor&&); - FileDescriptor& operator=(FileDescriptor&&); - - ~FileDescriptor(); - - Status Close(); - - /// May return -1 if closed or default-initialized - int fd() const { return fd_.load(); } - - /// Detach and return the underlying file descriptor - int Detach(); - - bool closed() const { return fd_.load() == -1; } - - protected: - static void CloseFromDestructor(int fd); - - std::atomic fd_{-1}; -}; - -/// Open a file for reading and return a file descriptor. -ARROW_EXPORT -Result FileOpenReadable(const PlatformFilename& file_name); - -/// Open a file for writing and return a file descriptor. -ARROW_EXPORT -Result FileOpenWritable(const PlatformFilename& file_name, - bool write_only = true, bool truncate = true, - bool append = false); - -/// Read from current file position. Return number of bytes read. -ARROW_EXPORT -Result FileRead(int fd, uint8_t* buffer, int64_t nbytes); -/// Read from given file position. Return number of bytes read. -ARROW_EXPORT -Result FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes); - -ARROW_EXPORT -Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes); -ARROW_EXPORT -Status FileTruncate(int fd, const int64_t size); - -ARROW_EXPORT -Status FileSeek(int fd, int64_t pos); -ARROW_EXPORT -Status FileSeek(int fd, int64_t pos, int whence); -ARROW_EXPORT -Result FileTell(int fd); -ARROW_EXPORT -Result FileGetSize(int fd); - -ARROW_EXPORT -Status FileClose(int fd); - -struct Pipe { - FileDescriptor rfd; - FileDescriptor wfd; - - Status Close() { return rfd.Close() & wfd.Close(); } -}; - -ARROW_EXPORT -Result CreatePipe(); - -ARROW_EXPORT -Status SetPipeFileDescriptorNonBlocking(int fd); - -class ARROW_EXPORT SelfPipe { - public: - static Result> Make(bool signal_safe); - virtual ~SelfPipe(); - - /// \brief Wait for a wakeup. - /// - /// Status::Invalid is returned if the pipe has been shutdown. - /// Otherwise the next sent payload is returned. - virtual Result Wait() = 0; - - /// \brief Wake up the pipe by sending a payload. - /// - /// This method is async-signal-safe if `signal_safe` was set to true. - virtual void Send(uint64_t payload) = 0; - - /// \brief Wake up the pipe and shut it down. - virtual Status Shutdown() = 0; -}; - -ARROW_EXPORT -int64_t GetPageSize(); - -struct MemoryRegion { - void* addr; - size_t size; -}; - -ARROW_EXPORT -Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, - void** new_addr); -ARROW_EXPORT -Status MemoryAdviseWillNeed(const std::vector& regions); - -ARROW_EXPORT -Result GetEnvVar(const char* name); -ARROW_EXPORT -Result GetEnvVar(const std::string& name); -ARROW_EXPORT -Result GetEnvVarNative(const char* name); -ARROW_EXPORT -Result GetEnvVarNative(const std::string& name); - -ARROW_EXPORT -Status SetEnvVar(const char* name, const char* value); -ARROW_EXPORT -Status SetEnvVar(const std::string& name, const std::string& value); -ARROW_EXPORT -Status DelEnvVar(const char* name); -ARROW_EXPORT -Status DelEnvVar(const std::string& name); - -ARROW_EXPORT -std::string ErrnoMessage(int errnum); -#if _WIN32 -ARROW_EXPORT -std::string WinErrorMessage(int errnum); -#endif - -ARROW_EXPORT -std::shared_ptr StatusDetailFromErrno(int errnum); -#if _WIN32 -ARROW_EXPORT -std::shared_ptr StatusDetailFromWinError(int errnum); -#endif -ARROW_EXPORT -std::shared_ptr StatusDetailFromSignal(int signum); - -template -Status StatusFromErrno(int errnum, StatusCode code, Args&&... args) { - return Status::FromDetailAndArgs(code, StatusDetailFromErrno(errnum), - std::forward(args)...); -} - -template -Status IOErrorFromErrno(int errnum, Args&&... args) { - return StatusFromErrno(errnum, StatusCode::IOError, std::forward(args)...); -} - -#if _WIN32 -template -Status StatusFromWinError(int errnum, StatusCode code, Args&&... args) { - return Status::FromDetailAndArgs(code, StatusDetailFromWinError(errnum), - std::forward(args)...); -} - -template -Status IOErrorFromWinError(int errnum, Args&&... args) { - return StatusFromWinError(errnum, StatusCode::IOError, std::forward(args)...); -} -#endif - -template -Status StatusFromSignal(int signum, StatusCode code, Args&&... args) { - return Status::FromDetailAndArgs(code, StatusDetailFromSignal(signum), - std::forward(args)...); -} - -template -Status CancelledFromSignal(int signum, Args&&... args) { - return StatusFromSignal(signum, StatusCode::Cancelled, std::forward(args)...); -} - -ARROW_EXPORT -int ErrnoFromStatus(const Status&); - -// Always returns 0 on non-Windows platforms (for Python). -ARROW_EXPORT -int WinErrorFromStatus(const Status&); - -ARROW_EXPORT -int SignalFromStatus(const Status&); - -class ARROW_EXPORT TemporaryDir { - public: - ~TemporaryDir(); - - /// '/'-terminated path to the temporary dir - const PlatformFilename& path() { return path_; } - - /// Create a temporary subdirectory in the system temporary dir, - /// named starting with `prefix`. - static Result> Make(const std::string& prefix); - - private: - PlatformFilename path_; - - explicit TemporaryDir(PlatformFilename&&); -}; - -class ARROW_EXPORT SignalHandler { - public: - typedef void (*Callback)(int); - - SignalHandler(); - explicit SignalHandler(Callback cb); -#if ARROW_HAVE_SIGACTION - explicit SignalHandler(const struct sigaction& sa); -#endif - - Callback callback() const; -#if ARROW_HAVE_SIGACTION - const struct sigaction& action() const; -#endif - - protected: -#if ARROW_HAVE_SIGACTION - // Storing the full sigaction allows to restore the entire signal handling - // configuration. - struct sigaction sa_; -#else - Callback cb_; -#endif -}; - -/// \brief Return the current handler for the given signal number. -ARROW_EXPORT -Result GetSignalHandler(int signum); - -/// \brief Set a new handler for the given signal number. -/// -/// The old signal handler is returned. -ARROW_EXPORT -Result SetSignalHandler(int signum, const SignalHandler& handler); - -/// \brief Reinstate the signal handler -/// -/// For use in signal handlers. This is needed on platforms without sigaction() -/// such as Windows, as the default signal handler is restored there as -/// soon as a signal is raised. -ARROW_EXPORT -void ReinstateSignalHandler(int signum, SignalHandler::Callback handler); - -/// \brief Send a signal to the current process -/// -/// The thread which will receive the signal is unspecified. -ARROW_EXPORT -Status SendSignal(int signum); - -/// \brief Send a signal to the given thread -/// -/// This function isn't supported on Windows. -ARROW_EXPORT -Status SendSignalToThread(int signum, uint64_t thread_id); - -/// \brief Get an unpredictable random seed -/// -/// This function may be slightly costly, so should only be used to initialize -/// a PRNG, not to generate a large amount of random numbers. -/// It is better to use this function rather than std::random_device, unless -/// absolutely necessary (e.g. to generate a cryptographic secret). -ARROW_EXPORT -int64_t GetRandomSeed(); - -/// \brief Get the current thread id -/// -/// In addition to having the same properties as std::thread, the returned value -/// is a regular integer value, which is more convenient than an opaque type. -ARROW_EXPORT -uint64_t GetThreadId(); - -/// \brief Get the current memory used by the current process in bytes -/// -/// This function supports Windows, Linux, and Mac and will return 0 otherwise -ARROW_EXPORT -int64_t GetCurrentRSS(); - -/// \brief Get the total memory available to the system in bytes -/// -/// This function supports Windows, Linux, and Mac and will return 0 otherwise -ARROW_EXPORT -int64_t GetTotalMemoryBytes(); - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/iterator.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/iterator.h deleted file mode 100644 index 5e716d0f..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/iterator.h +++ /dev/null @@ -1,568 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/util/compare.h" -#include "arrow/util/functional.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -template -class Iterator; - -template -struct IterationTraits { - /// \brief a reserved value which indicates the end of iteration. By - /// default this is NULLPTR since most iterators yield pointer types. - /// Specialize IterationTraits if different end semantics are required. - /// - /// Note: This should not be used to determine if a given value is a - /// terminal value. Use IsIterationEnd (which uses IsEnd) instead. This - /// is only for returning terminal values. - static T End() { return T(NULLPTR); } - - /// \brief Checks to see if the value is a terminal value. - /// A method is used here since T is not necessarily comparable in many - /// cases even though it has a distinct final value - static bool IsEnd(const T& val) { return val == End(); } -}; - -template -T IterationEnd() { - return IterationTraits::End(); -} - -template -bool IsIterationEnd(const T& val) { - return IterationTraits::IsEnd(val); -} - -template -struct IterationTraits> { - /// \brief by default when iterating through a sequence of optional, - /// nullopt indicates the end of iteration. - /// Specialize IterationTraits if different end semantics are required. - static std::optional End() { return std::nullopt; } - - /// \brief by default when iterating through a sequence of optional, - /// nullopt (!has_value()) indicates the end of iteration. - /// Specialize IterationTraits if different end semantics are required. - static bool IsEnd(const std::optional& val) { return !val.has_value(); } - - // TODO(bkietz) The range-for loop over Iterator> yields - // Result> which is unnecessary (since only the unyielded end optional - // is nullopt. Add IterationTraits::GetRangeElement() to handle this case -}; - -/// \brief A generic Iterator that can return errors -template -class Iterator : public util::EqualityComparable> { - public: - /// \brief Iterator may be constructed from any type which has a member function - /// with signature Result Next(); - /// End of iterator is signalled by returning IteratorTraits::End(); - /// - /// The argument is moved or copied to the heap and kept in a unique_ptr. Only - /// its destructor and its Next method (which are stored in function pointers) are - /// referenced after construction. - /// - /// This approach is used to dodge MSVC linkage hell (ARROW-6244, ARROW-6558) when using - /// an abstract template base class: instead of being inlined as usual for a template - /// function the base's virtual destructor will be exported, leading to multiple - /// definition errors when linking to any other TU where the base is instantiated. - template - explicit Iterator(Wrapped has_next) - : ptr_(new Wrapped(std::move(has_next)), Delete), next_(Next) {} - - Iterator() : ptr_(NULLPTR, [](void*) {}) {} - - /// \brief Return the next element of the sequence, IterationTraits::End() when the - /// iteration is completed. Calling this on a default constructed Iterator - /// will result in undefined behavior. - Result Next() { return next_(ptr_.get()); } - - /// Pass each element of the sequence to a visitor. Will return any error status - /// returned by the visitor, terminating iteration. - template - Status Visit(Visitor&& visitor) { - for (;;) { - ARROW_ASSIGN_OR_RAISE(auto value, Next()); - - if (IsIterationEnd(value)) break; - - ARROW_RETURN_NOT_OK(visitor(std::move(value))); - } - - return Status::OK(); - } - - /// Iterators will only compare equal if they are both null. - /// Equality comparability is required to make an Iterator of Iterators - /// (to check for the end condition). - bool Equals(const Iterator& other) const { return ptr_ == other.ptr_; } - - explicit operator bool() const { return ptr_ != NULLPTR; } - - class RangeIterator { - public: - RangeIterator() : value_(IterationTraits::End()) {} - - explicit RangeIterator(Iterator i) - : value_(IterationTraits::End()), - iterator_(std::make_shared(std::move(i))) { - Next(); - } - - bool operator!=(const RangeIterator& other) const { return value_ != other.value_; } - - RangeIterator& operator++() { - Next(); - return *this; - } - - Result operator*() { - ARROW_RETURN_NOT_OK(value_.status()); - - auto value = std::move(value_); - value_ = IterationTraits::End(); - return value; - } - - private: - void Next() { - if (!value_.ok()) { - value_ = IterationTraits::End(); - return; - } - value_ = iterator_->Next(); - } - - Result value_; - std::shared_ptr iterator_; - }; - - RangeIterator begin() { return RangeIterator(std::move(*this)); } - - RangeIterator end() { return RangeIterator(); } - - /// \brief Move every element of this iterator into a vector. - Result> ToVector() { - std::vector out; - for (auto maybe_element : *this) { - ARROW_ASSIGN_OR_RAISE(auto element, maybe_element); - out.push_back(std::move(element)); - } - // ARROW-8193: On gcc-4.8 without the explicit move it tries to use the - // copy constructor, which may be deleted on the elements of type T - return std::move(out); - } - - private: - /// Implementation of deleter for ptr_: Casts from void* to the wrapped type and - /// deletes that. - template - static void Delete(void* ptr) { - delete static_cast(ptr); - } - - /// Implementation of Next: Casts from void* to the wrapped type and invokes that - /// type's Next member function. - template - static Result Next(void* ptr) { - return static_cast(ptr)->Next(); - } - - /// ptr_ is a unique_ptr to void with a custom deleter: a function pointer which first - /// casts from void* to a pointer to the wrapped type then deletes that. - std::unique_ptr ptr_; - - /// next_ is a function pointer which first casts from void* to a pointer to the wrapped - /// type then invokes its Next member function. - Result (*next_)(void*) = NULLPTR; -}; - -template -struct TransformFlow { - using YieldValueType = T; - - TransformFlow(YieldValueType value, bool ready_for_next) - : finished_(false), - ready_for_next_(ready_for_next), - yield_value_(std::move(value)) {} - TransformFlow(bool finished, bool ready_for_next) - : finished_(finished), ready_for_next_(ready_for_next), yield_value_() {} - - bool HasValue() const { return yield_value_.has_value(); } - bool Finished() const { return finished_; } - bool ReadyForNext() const { return ready_for_next_; } - T Value() const { return *yield_value_; } - - bool finished_ = false; - bool ready_for_next_ = false; - std::optional yield_value_; -}; - -struct TransformFinish { - template - operator TransformFlow() && { // NOLINT explicit - return TransformFlow(true, true); - } -}; - -struct TransformSkip { - template - operator TransformFlow() && { // NOLINT explicit - return TransformFlow(false, true); - } -}; - -template -TransformFlow TransformYield(T value = {}, bool ready_for_next = true) { - return TransformFlow(std::move(value), ready_for_next); -} - -template -using Transformer = std::function>(T)>; - -template -class TransformIterator { - public: - explicit TransformIterator(Iterator it, Transformer transformer) - : it_(std::move(it)), - transformer_(std::move(transformer)), - last_value_(), - finished_() {} - - Result Next() { - while (!finished_) { - ARROW_ASSIGN_OR_RAISE(std::optional next, Pump()); - if (next.has_value()) { - return std::move(*next); - } - ARROW_ASSIGN_OR_RAISE(last_value_, it_.Next()); - } - return IterationTraits::End(); - } - - private: - // Calls the transform function on the current value. Can return in several ways - // * If the next value is requested (e.g. skip) it will return an empty optional - // * If an invalid status is encountered that will be returned - // * If finished it will return IterationTraits::End() - // * If a value is returned by the transformer that will be returned - Result> Pump() { - if (!finished_ && last_value_.has_value()) { - auto next_res = transformer_(*last_value_); - if (!next_res.ok()) { - finished_ = true; - return next_res.status(); - } - auto next = *next_res; - if (next.ReadyForNext()) { - if (IsIterationEnd(*last_value_)) { - finished_ = true; - } - last_value_.reset(); - } - if (next.Finished()) { - finished_ = true; - } - if (next.HasValue()) { - return next.Value(); - } - } - if (finished_) { - return IterationTraits::End(); - } - return std::nullopt; - } - - Iterator it_; - Transformer transformer_; - std::optional last_value_; - bool finished_ = false; -}; - -/// \brief Transforms an iterator according to a transformer, returning a new Iterator. -/// -/// The transformer will be called on each element of the source iterator and for each -/// call it can yield a value, skip, or finish the iteration. When yielding a value the -/// transformer can choose to consume the source item (the default, ready_for_next = true) -/// or to keep it and it will be called again on the same value. -/// -/// This is essentially a more generic form of the map operation that can return 0, 1, or -/// many values for each of the source items. -/// -/// The transformer will be exposed to the end of the source sequence -/// (IterationTraits::End) in case it needs to return some penultimate item(s). -/// -/// Any invalid status returned by the transformer will be returned immediately. -template -Iterator MakeTransformedIterator(Iterator it, Transformer op) { - return Iterator(TransformIterator(std::move(it), std::move(op))); -} - -template -struct IterationTraits> { - // The end condition for an Iterator of Iterators is a default constructed (null) - // Iterator. - static Iterator End() { return Iterator(); } - static bool IsEnd(const Iterator& val) { return !val; } -}; - -template -class FunctionIterator { - public: - explicit FunctionIterator(Fn fn) : fn_(std::move(fn)) {} - - Result Next() { return fn_(); } - - private: - Fn fn_; -}; - -/// \brief Construct an Iterator which invokes a callable on Next() -template ::ValueType> -Iterator MakeFunctionIterator(Fn fn) { - return Iterator(FunctionIterator(std::move(fn))); -} - -template -Iterator MakeEmptyIterator() { - return MakeFunctionIterator([]() -> Result { return IterationTraits::End(); }); -} - -template -Iterator MakeErrorIterator(Status s) { - return MakeFunctionIterator([s]() -> Result { - ARROW_RETURN_NOT_OK(s); - return IterationTraits::End(); - }); -} - -/// \brief Simple iterator which yields the elements of a std::vector -template -class VectorIterator { - public: - explicit VectorIterator(std::vector v) : elements_(std::move(v)) {} - - Result Next() { - if (i_ == elements_.size()) { - return IterationTraits::End(); - } - return std::move(elements_[i_++]); - } - - private: - std::vector elements_; - size_t i_ = 0; -}; - -template -Iterator MakeVectorIterator(std::vector v) { - return Iterator(VectorIterator(std::move(v))); -} - -/// \brief Simple iterator which yields *pointers* to the elements of a std::vector. -/// This is provided to support T where IterationTraits::End is not specialized -template -class VectorPointingIterator { - public: - explicit VectorPointingIterator(std::vector v) : elements_(std::move(v)) {} - - Result Next() { - if (i_ == elements_.size()) { - return NULLPTR; - } - return &elements_[i_++]; - } - - private: - std::vector elements_; - size_t i_ = 0; -}; - -template -Iterator MakeVectorPointingIterator(std::vector v) { - return Iterator(VectorPointingIterator(std::move(v))); -} - -/// \brief MapIterator takes ownership of an iterator and a function to apply -/// on every element. The mapped function is not allowed to fail. -template -class MapIterator { - public: - explicit MapIterator(Fn map, Iterator it) - : map_(std::move(map)), it_(std::move(it)) {} - - Result Next() { - ARROW_ASSIGN_OR_RAISE(I i, it_.Next()); - - if (IsIterationEnd(i)) { - return IterationTraits::End(); - } - - return map_(std::move(i)); - } - - private: - Fn map_; - Iterator it_; -}; - -/// \brief MapIterator takes ownership of an iterator and a function to apply -/// on every element. The mapped function is not allowed to fail. -template , - typename To = internal::call_traits::return_type> -Iterator MakeMapIterator(Fn map, Iterator it) { - return Iterator(MapIterator(std::move(map), std::move(it))); -} - -/// \brief Like MapIterator, but where the function can fail. -template , - typename To = typename internal::call_traits::return_type::ValueType> -Iterator MakeMaybeMapIterator(Fn map, Iterator it) { - return Iterator(MapIterator(std::move(map), std::move(it))); -} - -struct FilterIterator { - enum Action { ACCEPT, REJECT }; - - template - static Result> Reject() { - return std::make_pair(IterationTraits::End(), REJECT); - } - - template - static Result> Accept(To out) { - return std::make_pair(std::move(out), ACCEPT); - } - - template - static Result> MaybeAccept(Result maybe_out) { - return std::move(maybe_out).Map(Accept); - } - - template - static Result> Error(Status s) { - return s; - } - - template - class Impl { - public: - explicit Impl(Fn filter, Iterator it) : filter_(filter), it_(std::move(it)) {} - - Result Next() { - To out = IterationTraits::End(); - Action action; - - for (;;) { - ARROW_ASSIGN_OR_RAISE(From i, it_.Next()); - - if (IsIterationEnd(i)) { - return IterationTraits::End(); - } - - ARROW_ASSIGN_OR_RAISE(std::tie(out, action), filter_(std::move(i))); - - if (action == ACCEPT) return out; - } - } - - private: - Fn filter_; - Iterator it_; - }; -}; - -/// \brief Like MapIterator, but where the function can fail or reject elements. -template < - typename Fn, typename From = typename internal::call_traits::argument_type<0, Fn>, - typename Ret = typename internal::call_traits::return_type::ValueType, - typename To = typename std::tuple_element<0, Ret>::type, - typename Enable = typename std::enable_if::type, FilterIterator::Action>::value>::type> -Iterator MakeFilterIterator(Fn filter, Iterator it) { - return Iterator( - FilterIterator::Impl(std::move(filter), std::move(it))); -} - -/// \brief FlattenIterator takes an iterator generating iterators and yields a -/// unified iterator that flattens/concatenates in a single stream. -template -class FlattenIterator { - public: - explicit FlattenIterator(Iterator> it) : parent_(std::move(it)) {} - - Result Next() { - if (IsIterationEnd(child_)) { - // Pop from parent's iterator. - ARROW_ASSIGN_OR_RAISE(child_, parent_.Next()); - - // Check if final iteration reached. - if (IsIterationEnd(child_)) { - return IterationTraits::End(); - } - - return Next(); - } - - // Pop from child_ and check for depletion. - ARROW_ASSIGN_OR_RAISE(T out, child_.Next()); - if (IsIterationEnd(out)) { - // Reset state such that we pop from parent on the recursive call - child_ = IterationTraits>::End(); - - return Next(); - } - - return out; - } - - private: - Iterator> parent_; - Iterator child_ = IterationTraits>::End(); -}; - -template -Iterator MakeFlattenIterator(Iterator> it) { - return Iterator(FlattenIterator(std::move(it))); -} - -template -Iterator MakeIteratorFromReader( - const std::shared_ptr& reader) { - return MakeFunctionIterator([reader] { return reader->Next(); }); -} - -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/key_value_metadata.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/key_value_metadata.h deleted file mode 100644 index 8702ce73..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/key_value_metadata.h +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -/// \brief A container for key-value pair type metadata. Not thread-safe -class ARROW_EXPORT KeyValueMetadata { - public: - KeyValueMetadata(); - KeyValueMetadata(std::vector keys, std::vector values); - explicit KeyValueMetadata(const std::unordered_map& map); - - static std::shared_ptr Make(std::vector keys, - std::vector values); - - void ToUnorderedMap(std::unordered_map* out) const; - void Append(std::string key, std::string value); - - Result Get(const std::string& key) const; - bool Contains(const std::string& key) const; - // Note that deleting may invalidate known indices - Status Delete(const std::string& key); - Status Delete(int64_t index); - Status DeleteMany(std::vector indices); - Status Set(const std::string& key, const std::string& value); - - void reserve(int64_t n); - - int64_t size() const; - const std::string& key(int64_t i) const; - const std::string& value(int64_t i) const; - const std::vector& keys() const { return keys_; } - const std::vector& values() const { return values_; } - - std::vector> sorted_pairs() const; - - /// \brief Perform linear search for key, returning -1 if not found - int FindKey(const std::string& key) const; - - std::shared_ptr Copy() const; - - /// \brief Return a new KeyValueMetadata by combining the passed metadata - /// with this KeyValueMetadata. Colliding keys will be overridden by the - /// passed metadata. Assumes keys in both containers are unique - std::shared_ptr Merge(const KeyValueMetadata& other) const; - - bool Equals(const KeyValueMetadata& other) const; - std::string ToString() const; - - private: - std::vector keys_; - std::vector values_; - - ARROW_DISALLOW_COPY_AND_ASSIGN(KeyValueMetadata); -}; - -/// \brief Create a KeyValueMetadata instance -/// -/// \param pairs key-value mapping -ARROW_EXPORT std::shared_ptr key_value_metadata( - const std::unordered_map& pairs); - -/// \brief Create a KeyValueMetadata instance -/// -/// \param keys sequence of metadata keys -/// \param values sequence of corresponding metadata values -ARROW_EXPORT std::shared_ptr key_value_metadata( - std::vector keys, std::vector values); - -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/launder.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/launder.h deleted file mode 100644 index 9e4533c4..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/launder.h +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -namespace arrow { -namespace internal { - -#if __cpp_lib_launder -using std::launder; -#else -template -constexpr T* launder(T* p) noexcept { - return p; -} -#endif - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/list_util.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/list_util.h deleted file mode 100644 index 58deb801..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/list_util.h +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/array/data.h" -#include "arrow/result.h" - -namespace arrow { -namespace list_util { -namespace internal { - -/// \brief Calculate the smallest continuous range of values used by the -/// var-length list-like input (list, map and list-view types). -/// -/// \param input The input array such that is_var_length_list_like(input.type) -/// is true -/// \return A pair of (offset, length) describing the range -ARROW_EXPORT Result> RangeOfValuesUsed( - const ArraySpan& input); - -/// \brief Calculate the sum of the sizes of all valid lists or list-views -/// -/// This is usually the same as the length of the RangeOfValuesUsed() range, but -/// it can be: -/// - Smaller: when the child array contains many values that are not -/// referenced by the lists or list-views in the parent array -/// - Greater: when the list-views share child array ranges -/// -/// \param input The input array such that is_var_length_list_like(input.type) -/// is true -/// \return The sum of all list or list-view sizes -ARROW_EXPORT Result SumOfLogicalListSizes(const ArraySpan& input); - -} // namespace internal - -} // namespace list_util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/logging.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/logging.h deleted file mode 100644 index 2baa5605..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/logging.h +++ /dev/null @@ -1,259 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#ifdef GANDIVA_IR - -// The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to -// streams or stdc++. So, making the DCHECK calls void in that case. - -#define ARROW_IGNORE_EXPR(expr) ((void)(expr)) - -#define DCHECK(condition) ARROW_IGNORE_EXPR(condition) -#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status) -#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1) -#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1) -#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1) -#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1) -#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1) -#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1) - -#else // !GANDIVA_IR - -#include -#include -#include - -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace util { - -enum class ArrowLogLevel : int { - ARROW_DEBUG = -1, - ARROW_INFO = 0, - ARROW_WARNING = 1, - ARROW_ERROR = 2, - ARROW_FATAL = 3 -}; - -#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level) -#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level) - -#define ARROW_IGNORE_EXPR(expr) ((void)(expr)) - -#define ARROW_CHECK_OR_LOG(condition, level) \ - ARROW_PREDICT_TRUE(condition) \ - ? ARROW_IGNORE_EXPR(0) \ - : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " " - -#define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL) - -// If 'to_call' returns a bad status, CHECK immediately with a logged message -// of 'msg' followed by the status. -#define ARROW_CHECK_OK_PREPEND(to_call, msg, level) \ - do { \ - ::arrow::Status _s = (to_call); \ - ARROW_CHECK_OR_LOG(_s.ok(), level) \ - << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \ - << (msg) << ": " << _s.ToString(); \ - } while (false) - -// If the status is bad, CHECK immediately, appending the status to the -// logged message. -#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL) - -#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2)) -#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2)) -#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2)) -#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2)) -#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2)) -#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2)) - -#ifdef NDEBUG -#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING - -// CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros -// only do so in debug mode. - -#define ARROW_DCHECK(condition) \ - while (false) ARROW_IGNORE_EXPR(condition); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_OK(s) \ - ARROW_IGNORE_EXPR(s); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_EQ(val1, val2) \ - while (false) ARROW_IGNORE_EXPR(val1); \ - while (false) ARROW_IGNORE_EXPR(val2); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_NE(val1, val2) \ - while (false) ARROW_IGNORE_EXPR(val1); \ - while (false) ARROW_IGNORE_EXPR(val2); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_LE(val1, val2) \ - while (false) ARROW_IGNORE_EXPR(val1); \ - while (false) ARROW_IGNORE_EXPR(val2); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_LT(val1, val2) \ - while (false) ARROW_IGNORE_EXPR(val1); \ - while (false) ARROW_IGNORE_EXPR(val2); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_GE(val1, val2) \ - while (false) ARROW_IGNORE_EXPR(val1); \ - while (false) ARROW_IGNORE_EXPR(val2); \ - while (false) ::arrow::util::detail::NullLog() -#define ARROW_DCHECK_GT(val1, val2) \ - while (false) ARROW_IGNORE_EXPR(val1); \ - while (false) ARROW_IGNORE_EXPR(val2); \ - while (false) ::arrow::util::detail::NullLog() - -#else -#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL - -#define ARROW_DCHECK ARROW_CHECK -#define ARROW_DCHECK_OK ARROW_CHECK_OK -#define ARROW_DCHECK_EQ ARROW_CHECK_EQ -#define ARROW_DCHECK_NE ARROW_CHECK_NE -#define ARROW_DCHECK_LE ARROW_CHECK_LE -#define ARROW_DCHECK_LT ARROW_CHECK_LT -#define ARROW_DCHECK_GE ARROW_CHECK_GE -#define ARROW_DCHECK_GT ARROW_CHECK_GT - -#endif // NDEBUG - -#define DCHECK ARROW_DCHECK -#define DCHECK_OK ARROW_DCHECK_OK -#define DCHECK_EQ ARROW_DCHECK_EQ -#define DCHECK_NE ARROW_DCHECK_NE -#define DCHECK_LE ARROW_DCHECK_LE -#define DCHECK_LT ARROW_DCHECK_LT -#define DCHECK_GE ARROW_DCHECK_GE -#define DCHECK_GT ARROW_DCHECK_GT - -// This code is adapted from -// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h. - -// To make the logging lib pluggable with other logging libs and make -// the implementation unawared by the user, ArrowLog is only a declaration -// which hide the implementation into logging.cc file. -// In logging.cc, we can choose different log libs using different macros. - -// This is also a null log which does not output anything. -class ARROW_EXPORT ArrowLogBase { - public: - virtual ~ArrowLogBase() {} - - virtual bool IsEnabled() const { return false; } - - template - ArrowLogBase& operator<<(const T& t) { - if (IsEnabled()) { - Stream() << t; - } - return *this; - } - - protected: - virtual std::ostream& Stream() = 0; -}; - -class ARROW_EXPORT ArrowLog : public ArrowLogBase { - public: - ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity); - ~ArrowLog() override; - - /// Return whether or not current logging instance is enabled. - /// - /// \return True if logging is enabled and false otherwise. - bool IsEnabled() const override; - - /// The init function of arrow log for a program which should be called only once. - /// - /// \param appName The app name which starts the log. - /// \param severity_threshold Logging threshold for the program. - /// \param logDir Logging output file name. If empty, the log won't output to file. - static void StartArrowLog(const std::string& appName, - ArrowLogLevel severity_threshold = ArrowLogLevel::ARROW_INFO, - const std::string& logDir = ""); - - /// The shutdown function of arrow log, it should be used with StartArrowLog as a pair. - static void ShutDownArrowLog(); - - /// Install the failure signal handler to output call stack when crash. - /// If glog is not installed, this function won't do anything. - static void InstallFailureSignalHandler(); - - /// Uninstall the signal actions installed by InstallFailureSignalHandler. - static void UninstallSignalAction(); - - /// Return whether or not the log level is enabled in current setting. - /// - /// \param log_level The input log level to test. - /// \return True if input log level is not lower than the threshold. - static bool IsLevelEnabled(ArrowLogLevel log_level); - - private: - ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowLog); - - // Hide the implementation of log provider by void *. - // Otherwise, lib user may define the same macro to use the correct header file. - void* logging_provider_; - /// True if log messages should be logged and false if they should be ignored. - bool is_enabled_; - - static ArrowLogLevel severity_threshold_; - - protected: - std::ostream& Stream() override; -}; - -// This class make ARROW_CHECK compilation pass to change the << operator to void. -// This class is copied from glog. -class ARROW_EXPORT Voidify { - public: - Voidify() {} - // This has to be an operator with a precedence lower than << but - // higher than ?: - void operator&(ArrowLogBase&) {} -}; - -namespace detail { - -/// @brief A helper for the nil log sink. -/// -/// Using this helper is analogous to sending log messages to /dev/null: -/// nothing gets logged. -class NullLog { - public: - /// The no-op output operator. - /// - /// @param [in] t - /// The object to send into the nil sink. - /// @return Reference to the updated object. - template - NullLog& operator<<(const T& t) { - return *this; - } -}; - -} // namespace detail -} // namespace util -} // namespace arrow - -#endif // GANDIVA_IR diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/macros.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/macros.h deleted file mode 100644 index b5675faa..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/macros.h +++ /dev/null @@ -1,191 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#define ARROW_EXPAND(x) x -#define ARROW_STRINGIFY(x) #x -#define ARROW_CONCAT(x, y) x##y - -// From Google gutil -#ifndef ARROW_DISALLOW_COPY_AND_ASSIGN -#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&) = delete; \ - void operator=(const TypeName&) = delete -#endif - -#ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN -#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \ - TypeName(TypeName&&) = default; \ - TypeName& operator=(TypeName&&) = default -#endif - -#define ARROW_UNUSED(x) (void)(x) -#define ARROW_ARG_UNUSED(x) -// -// GCC can be told that a certain branch is not likely to be taken (for -// instance, a CHECK failure), and use that information in static analysis. -// Giving it this information can help it optimize for the common case in -// the absence of better information (ie. -fprofile-arcs). -// -#if defined(__GNUC__) -#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0)) -#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -#define ARROW_NORETURN __attribute__((noreturn)) -#define ARROW_NOINLINE __attribute__((noinline)) -#define ARROW_PREFETCH(addr) __builtin_prefetch(addr) -#elif defined(_MSC_VER) -#define ARROW_NORETURN __declspec(noreturn) -#define ARROW_NOINLINE __declspec(noinline) -#define ARROW_PREDICT_FALSE(x) (x) -#define ARROW_PREDICT_TRUE(x) (x) -#define ARROW_PREFETCH(addr) -#else -#define ARROW_NORETURN -#define ARROW_PREDICT_FALSE(x) (x) -#define ARROW_PREDICT_TRUE(x) (x) -#define ARROW_PREFETCH(addr) -#endif - -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) -#define ARROW_RESTRICT __restrict -#else -#define ARROW_RESTRICT -#endif - -// ---------------------------------------------------------------------- -// C++/CLI support macros (see ARROW-1134) - -#ifndef NULLPTR - -#ifdef __cplusplus_cli -#define NULLPTR __nullptr -#else -#define NULLPTR nullptr -#endif - -#endif // ifndef NULLPTR - -// ---------------------------------------------------------------------- - -// clang-format off -// [[deprecated]] is only available in C++14, use this for the time being -// This macro takes an optional deprecation message -#ifdef __COVERITY__ -# define ARROW_DEPRECATED(...) -#else -# define ARROW_DEPRECATED(...) [[deprecated(__VA_ARGS__)]] -#endif - -#ifdef __COVERITY__ -# define ARROW_DEPRECATED_ENUM_VALUE(...) -#else -# define ARROW_DEPRECATED_ENUM_VALUE(...) [[deprecated(__VA_ARGS__)]] -#endif - -// clang-format on - -// Macros to disable deprecation warnings - -#ifdef __clang__ -#define ARROW_SUPPRESS_DEPRECATION_WARNING \ - _Pragma("clang diagnostic push"); \ - _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop") -#elif defined(__GNUC__) -#define ARROW_SUPPRESS_DEPRECATION_WARNING \ - _Pragma("GCC diagnostic push"); \ - _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop") -#elif defined(_MSC_VER) -#define ARROW_SUPPRESS_DEPRECATION_WARNING \ - __pragma(warning(push)) __pragma(warning(disable : 4996)) -#define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop)) -#else -#define ARROW_SUPPRESS_DEPRECATION_WARNING -#define ARROW_UNSUPPRESS_DEPRECATION_WARNING -#endif - -// ---------------------------------------------------------------------- - -// macros to disable padding -// these macros are portable across different compilers and platforms -//[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355] -#if !defined(MANUALLY_ALIGNED_STRUCT) -#if defined(_MSC_VER) -#define MANUALLY_ALIGNED_STRUCT(alignment) \ - __pragma(pack(1)); \ - struct __declspec(align(alignment)) -#define STRUCT_END(name, size) \ - __pragma(pack()); \ - static_assert(sizeof(name) == size, "compiler breaks packing rules") -#elif defined(__GNUC__) || defined(__clang__) -#define MANUALLY_ALIGNED_STRUCT(alignment) \ - _Pragma("pack(1)") struct __attribute__((aligned(alignment))) -#define STRUCT_END(name, size) \ - _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules") -#else -#error Unknown compiler, please define structure alignment macros -#endif -#endif // !defined(MANUALLY_ALIGNED_STRUCT) - -// ---------------------------------------------------------------------- -// Convenience macro disabling a particular UBSan check in a function - -#if defined(__clang__) -#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature))) -#else -#define ARROW_DISABLE_UBSAN(feature) -#endif - -// ---------------------------------------------------------------------- -// Machine information - -#if INTPTR_MAX == INT64_MAX -#define ARROW_BITNESS 64 -#elif INTPTR_MAX == INT32_MAX -#define ARROW_BITNESS 32 -#else -#error Unexpected INTPTR_MAX -#endif - -// ---------------------------------------------------------------------- -// From googletest -// (also in parquet-cpp) - -// When you need to test the private or protected members of a class, -// use the FRIEND_TEST macro to declare your tests as friends of the -// class. For example: -// -// class MyClass { -// private: -// void MyMethod(); -// FRIEND_TEST(MyClassTest, MyMethod); -// }; -// -// class MyClassTest : public testing::Test { -// // ... -// }; -// -// TEST_F(MyClassTest, MyMethod) { -// // Can call MyClass::MyMethod() here. -// } - -#define FRIEND_TEST(test_case_name, test_name) \ - friend class test_case_name##_##test_name##_Test diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/map.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/map.h deleted file mode 100644 index 55239090..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/map.h +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "arrow/result.h" - -namespace arrow { -namespace internal { - -/// Helper providing single-lookup conditional insertion into std::map or -/// std::unordered_map. If `key` exists in the container, an iterator to that pair -/// will be returned. If `key` does not exist in the container, `gen(key)` will be -/// invoked and its return value inserted. -template -auto GetOrInsertGenerated(Map* map, typename Map::key_type key, Gen&& gen) - -> decltype(map->begin()->second = gen(map->begin()->first), map->begin()) { - decltype(gen(map->begin()->first)) placeholder{}; - - auto it_success = map->emplace(std::move(key), std::move(placeholder)); - if (it_success.second) { - // insertion of placeholder succeeded, overwrite it with gen() - const auto& inserted_key = it_success.first->first; - auto* value = &it_success.first->second; - *value = gen(inserted_key); - } - return it_success.first; -} - -template -auto GetOrInsertGenerated(Map* map, typename Map::key_type key, Gen&& gen) - -> Resultbegin()->second = gen(map->begin()->first).ValueOrDie(), - map->begin())> { - decltype(gen(map->begin()->first).ValueOrDie()) placeholder{}; - - auto it_success = map->emplace(std::move(key), std::move(placeholder)); - if (it_success.second) { - // insertion of placeholder succeeded, overwrite it with gen() - const auto& inserted_key = it_success.first->first; - auto* value = &it_success.first->second; - ARROW_ASSIGN_OR_RAISE(*value, gen(inserted_key)); - } - return it_success.first; -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/math_constants.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/math_constants.h deleted file mode 100644 index 7ee87c5d..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/math_constants.h +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -// Not provided by default in MSVC, -// and _USE_MATH_DEFINES is not reliable with unity builds -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif -#ifndef M_PI_2 -#define M_PI_2 1.57079632679489661923 -#endif -#ifndef M_PI_4 -#define M_PI_4 0.785398163397448309616 -#endif diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/memory.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/memory.h deleted file mode 100644 index 4250d069..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/memory.h +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/util/macros.h" - -namespace arrow { -namespace internal { - -// A helper function for doing memcpy with multiple threads. This is required -// to saturate the memory bandwidth of modern cpus. -void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes, - uintptr_t block_size, int num_threads); - -// A helper function for checking if two wrapped objects implementing `Equals` -// are equal. -template -bool SharedPtrEquals(const std::shared_ptr& left, const std::shared_ptr& right) { - if (left == right) return true; - if (left == NULLPTR || right == NULLPTR) return false; - return left->Equals(*right); -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/mutex.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/mutex.h deleted file mode 100644 index ac63cf70..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/mutex.h +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace util { - -/// A wrapper around std::mutex since we can't use it directly in -/// public headers due to C++/CLI. -/// https://docs.microsoft.com/en-us/cpp/standard-library/mutex#remarks -class ARROW_EXPORT Mutex { - public: - Mutex(); - Mutex(Mutex&&) = default; - Mutex& operator=(Mutex&&) = default; - - /// A Guard is falsy if a lock could not be acquired. - class ARROW_EXPORT Guard { - public: - Guard() : locked_(NULLPTR, [](Mutex* mutex) {}) {} - Guard(Guard&&) = default; - Guard& operator=(Guard&&) = default; - - explicit operator bool() const { return bool(locked_); } - - void Unlock() { locked_.reset(); } - - private: - explicit Guard(Mutex* locked); - - std::unique_ptr locked_; - friend Mutex; - }; - - Guard TryLock(); - Guard Lock(); - - private: - struct Impl; - std::unique_ptr impl_; -}; - -#ifndef _WIN32 -/// Return a pointer to a process-wide, process-specific Mutex that can be used -/// at any point in a child process. NULL is returned when called in the parent. -/// -/// The rule is to first check that getpid() corresponds to the parent process pid -/// and, if not, call this function to lock any after-fork reinitialization code. -/// Like this: -/// -/// std::atomic pid{getpid()}; -/// ... -/// if (pid.load() != getpid()) { -/// // In child process -/// auto lock = GlobalForkSafeMutex()->Lock(); -/// if (pid.load() != getpid()) { -/// // Reinitialize internal structures after fork -/// ... -/// pid.store(getpid()); -ARROW_EXPORT -Mutex* GlobalForkSafeMutex(); -#endif - -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/parallel.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/parallel.h deleted file mode 100644 index 80f60fbd..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/parallel.h +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/status.h" -#include "arrow/util/functional.h" -#include "arrow/util/thread_pool.h" -#include "arrow/util/vector.h" - -namespace arrow { -namespace internal { - -// A parallelizer that takes a `Status(int)` function and calls it with -// arguments between 0 and `num_tasks - 1`, on an arbitrary number of threads. - -template -Status ParallelFor(int num_tasks, FUNCTION&& func, - Executor* executor = internal::GetCpuThreadPool()) { - std::vector> futures(num_tasks); - - for (int i = 0; i < num_tasks; ++i) { - ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i)); - } - auto st = Status::OK(); - for (auto& fut : futures) { - st &= fut.status(); - } - return st; -} - -template ::ValueType> -Future> ParallelForAsync( - std::vector inputs, FUNCTION&& func, - Executor* executor = internal::GetCpuThreadPool()) { - std::vector> futures(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i]))); - } - return All(std::move(futures)) - .Then([](const std::vector>& results) -> Result> { - return UnwrapOrRaise(results); - }); -} - -// A parallelizer that takes a `Status(int)` function and calls it with -// arguments between 0 and `num_tasks - 1`, in sequence or in parallel, -// depending on the input boolean. - -template -Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func, - Executor* executor = internal::GetCpuThreadPool()) { - if (use_threads) { - return ParallelFor(num_tasks, std::forward(func), executor); - } else { - for (int i = 0; i < num_tasks; ++i) { - RETURN_NOT_OK(func(i)); - } - return Status::OK(); - } -} - -// A parallelizer that takes a `Result(int index, T item)` function and -// calls it with each item from the input array, in sequence or in parallel, -// depending on the input boolean. - -template ::ValueType> -Future> OptionalParallelForAsync( - bool use_threads, std::vector inputs, FUNCTION&& func, - Executor* executor = internal::GetCpuThreadPool()) { - if (use_threads) { - return ParallelForAsync(std::move(inputs), std::forward(func), executor); - } else { - std::vector result(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(result[i], func(i, inputs[i])); - } - return result; - } -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/pcg_random.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/pcg_random.h deleted file mode 100644 index 768f2328..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/pcg_random.h +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/vendored/pcg/pcg_random.hpp" // IWYU pragma: export - -namespace arrow { -namespace random { - -using pcg32 = ::arrow_vendored::pcg32; -using pcg64 = ::arrow_vendored::pcg64; -using pcg32_fast = ::arrow_vendored::pcg32_fast; -using pcg64_fast = ::arrow_vendored::pcg64_fast; -using pcg32_oneseq = ::arrow_vendored::pcg32_oneseq; -using pcg64_oneseq = ::arrow_vendored::pcg64_oneseq; - -} // namespace random -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/print.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/print.h deleted file mode 100644 index 82cea473..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/print.h +++ /dev/null @@ -1,77 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. template - -#pragma once - -#include -#include "arrow/util/string.h" - -using arrow::internal::ToChars; - -namespace arrow { -namespace internal { - -namespace detail { - -template -struct TuplePrinter { - static void Print(OStream* os, const Tuple& t) { - TuplePrinter::Print(os, t); - *os << std::get(t); - } -}; - -template -struct TuplePrinter { - static void Print(OStream* os, const Tuple& t) {} -}; - -} // namespace detail - -// Print elements from a tuple to a stream, in order. -// Typical use is to pack a bunch of existing values with std::forward_as_tuple() -// before passing it to this function. -template -void PrintTuple(OStream* os, const std::tuple& tup) { - detail::TuplePrinter, sizeof...(Args)>::Print(os, tup); -} - -template -struct PrintVector { - const Range& range_; - const Separator& separator_; - - template // template to dodge inclusion of - friend Os& operator<<(Os& os, PrintVector l) { - bool first = true; - os << "["; - for (const auto& element : l.range_) { - if (first) { - first = false; - } else { - os << l.separator_; - } - os << ToChars(element); // use ToChars to avoid locale dependence - } - os << "]"; - return os; - } -}; -template -PrintVector(const Range&, const Separator&) -> PrintVector; -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/queue.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/queue.h deleted file mode 100644 index 6c71fa6e..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/queue.h +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/vendored/ProducerConsumerQueue.h" - -namespace arrow { -namespace util { - -template -using SpscQueue = arrow_vendored::folly::ProducerConsumerQueue; - -} -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/range.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/range.h deleted file mode 100644 index 20553287..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/range.h +++ /dev/null @@ -1,258 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace arrow::internal { - -/// Create a vector containing the values from start up to stop -template -std::vector Iota(T start, T stop) { - if (start > stop) { - return {}; - } - std::vector result(static_cast(stop - start)); - std::iota(result.begin(), result.end(), start); - return result; -} - -/// Create a vector containing the values from 0 up to length -template -std::vector Iota(T length) { - return Iota(static_cast(0), length); -} - -/// Create a range from a callable which takes a single index parameter -/// and returns the value of iterator on each call and a length. -/// Only iterators obtained from the same range should be compared, the -/// behaviour generally similar to other STL containers. -template -class LazyRange { - private: - // callable which generates the values - // has to be defined at the beginning of the class for type deduction - const Generator gen_; - // the length of the range - int64_t length_; -#ifdef _MSC_VER - // workaround to VS2010 not supporting decltype properly - // see https://stackoverflow.com/questions/21782846/decltype-for-class-member-function - static Generator gen_static_; -#endif - - public: -#ifdef _MSC_VER - using return_type = decltype(gen_static_(0)); -#else - using return_type = decltype(gen_(0)); -#endif - - /// Construct a new range from a callable and length - LazyRange(Generator gen, int64_t length) : gen_(gen), length_(length) {} - - // Class of the dependent iterator, created implicitly by begin and end - class RangeIter { - public: - using difference_type = int64_t; - using value_type = return_type; - using reference = const value_type&; - using pointer = const value_type*; - using iterator_category = std::forward_iterator_tag; - -#ifdef _MSC_VER - // msvc complains about unchecked iterators, - // see https://stackoverflow.com/questions/21655496/error-c4996-checked-iterators - using _Unchecked_type = typename LazyRange::RangeIter; -#endif - - RangeIter() = delete; - RangeIter(const RangeIter& other) = default; - RangeIter& operator=(const RangeIter& other) = default; - - RangeIter(const LazyRange& range, int64_t index) - : range_(&range), index_(index) {} - - const return_type operator*() const { return range_->gen_(index_); } - - RangeIter operator+(difference_type length) const { - return RangeIter(*range_, index_ + length); - } - - // pre-increment - RangeIter& operator++() { - ++index_; - return *this; - } - - // post-increment - RangeIter operator++(int) { - auto copy = RangeIter(*this); - ++index_; - return copy; - } - - bool operator==(const typename LazyRange::RangeIter& other) const { - return this->index_ == other.index_ && this->range_ == other.range_; - } - - bool operator!=(const typename LazyRange::RangeIter& other) const { - return this->index_ != other.index_ || this->range_ != other.range_; - } - - int64_t operator-(const typename LazyRange::RangeIter& other) const { - return this->index_ - other.index_; - } - - bool operator<(const typename LazyRange::RangeIter& other) const { - return this->index_ < other.index_; - } - - private: - // parent range reference - const LazyRange* range_; - // current index - int64_t index_; - }; - - friend class RangeIter; - - // Create a new begin const iterator - RangeIter begin() { return RangeIter(*this, 0); } - - // Create a new end const iterator - RangeIter end() { return RangeIter(*this, length_); } -}; - -/// Helper function to create a lazy range from a callable (e.g. lambda) and length -template -LazyRange MakeLazyRange(Generator&& gen, int64_t length) { - return LazyRange(std::forward(gen), length); -} - -/// \brief A helper for iterating multiple ranges simultaneously, similar to C++23's -/// zip() view adapter modelled after python's built-in zip() function. -/// -/// \code {.cpp} -/// const std::vector& tables = ... -/// std::function()> GetNames = ... -/// for (auto [table, name] : Zip(tables, GetNames())) { -/// static_assert(std::is_same_v); -/// static_assert(std::is_same_v); -/// // temporaries (like this vector of strings) are kept alive for the -/// // duration of a loop and are safely movable). -/// RegisterTableWithName(std::move(name), &table); -/// } -/// \endcode -/// -/// The zipped sequence ends as soon as any of its member ranges ends. -/// -/// Always use `auto` for the loop's declaration; it will always be a tuple -/// of references so for example using `const auto&` will compile but will -/// *look* like forcing const-ness even though the members of the tuple are -/// still mutable references. -/// -/// NOTE: we *could* make Zip a more full fledged range and enable things like -/// - gtest recognizing it as a container; it currently doesn't since Zip is -/// always mutable so this breaks: -/// EXPECT_THAT(Zip(std::vector{0}, std::vector{1}), -/// ElementsAre(std::tuple{0, 1})); -/// - letting it be random access when possible so we can do things like *sort* -/// parallel ranges -/// - ... -/// -/// However doing this will increase the compile time overhead of using Zip as -/// long as we're still using headers. Therefore until we can use c++20 modules: -/// *don't* extend Zip. -template -struct Zip; - -template -Zip(Ranges&&...) -> Zip, std::index_sequence_for>; - -template -struct Zip, std::index_sequence> { - explicit Zip(Ranges... ranges) : ranges_(std::forward(ranges)...) {} - - std::tuple ranges_; - - using sentinel = std::tuple(ranges_)))...>; - constexpr sentinel end() { return {std::end(std::get(ranges_))...}; } - - struct iterator : std::tuple(ranges_)))...> { - using std::tuple(ranges_)))...>::tuple; - - constexpr auto operator*() { - return std::tuple(*this))...>{*std::get(*this)...}; - } - - constexpr iterator& operator++() { - (++std::get(*this), ...); - return *this; - } - - constexpr bool operator!=(const sentinel& s) const { - bool all_iterators_valid = (... && (std::get(*this) != std::get(s))); - return all_iterators_valid; - } - }; - constexpr iterator begin() { return {std::begin(std::get(ranges_))...}; } -}; - -/// \brief A lazy sequence of integers which starts from 0 and never stops. -/// -/// This can be used in conjunction with Zip() to emulate python's built-in -/// enumerate() function: -/// -/// \code {.cpp} -/// const std::vector& tables = ... -/// for (auto [i, table] : Zip(Enumerate<>, tables)) { -/// std::cout << "#" << i << ": " << table.name() << std::endl; -/// } -/// \endcode -template -constexpr auto Enumerate = [] { - struct { - struct sentinel {}; - constexpr sentinel end() const { return {}; } - - struct iterator { - I value{0}; - - constexpr I operator*() { return value; } - - constexpr iterator& operator++() { - ++value; - return *this; - } - - constexpr std::true_type operator!=(sentinel) const { return {}; } - }; - constexpr iterator begin() const { return {}; } - } out; - - return out; -}(); - -} // namespace arrow::internal diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/ree_util.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/ree_util.h deleted file mode 100644 index a3e745ba..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/ree_util.h +++ /dev/null @@ -1,582 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "arrow/array/data.h" -#include "arrow/type_traits.h" -#include "arrow/util/checked_cast.h" -#include "arrow/util/macros.h" - -namespace arrow { -namespace ree_util { - -/// \brief Get the child array holding the run ends from an REE array -inline const ArraySpan& RunEndsArray(const ArraySpan& span) { return span.child_data[0]; } - -/// \brief Get the child array holding the data values from an REE array -inline const ArraySpan& ValuesArray(const ArraySpan& span) { return span.child_data[1]; } - -/// \brief Get a pointer to run ends values of an REE array -template -const RunEndCType* RunEnds(const ArraySpan& span) { - assert(RunEndsArray(span).type->id() == CTypeTraits::ArrowType::type_id); - return RunEndsArray(span).GetValues(1); -} - -/// \brief Perform basic validations on the parameters of an REE array -/// and its two children arrays -/// -/// All the checks complete in O(1) time. Consequently, this function: -/// - DOES NOT check that run_ends is sorted and all-positive -/// - DOES NOT check the actual contents of the run_ends and values arrays -Status ValidateRunEndEncodedChildren(const RunEndEncodedType& type, - int64_t logical_length, - const std::shared_ptr& run_ends_data, - const std::shared_ptr& values_data, - int64_t null_count, int64_t logical_offset); - -/// \brief Compute the logical null count of an REE array -int64_t LogicalNullCount(const ArraySpan& span); - -namespace internal { - -/// \brief Uses binary-search to find the physical offset given a logical offset -/// and run-end values -/// -/// \return the physical offset or run_ends_size if the physical offset is not -/// found in run_ends -template -int64_t FindPhysicalIndex(const RunEndCType* run_ends, int64_t run_ends_size, int64_t i, - int64_t absolute_offset) { - assert(absolute_offset + i >= 0); - auto it = std::upper_bound(run_ends, run_ends + run_ends_size, absolute_offset + i); - int64_t result = std::distance(run_ends, it); - assert(result <= run_ends_size); - return result; -} - -/// \brief Uses binary-search to calculate the range of physical values (and -/// run-ends) necessary to represent the logical range of values from -/// offset to length -/// -/// \return a pair of physical offset and physical length -template -std::pair FindPhysicalRange(const RunEndCType* run_ends, - int64_t run_ends_size, int64_t length, - int64_t offset) { - const int64_t physical_offset = - FindPhysicalIndex(run_ends, run_ends_size, 0, offset); - // The physical length is calculated by finding the offset of the last element - // and adding 1 to it, so first we ensure there is at least one element. - if (length == 0) { - return {physical_offset, 0}; - } - const int64_t physical_index_of_last = FindPhysicalIndex( - run_ends + physical_offset, run_ends_size - physical_offset, length - 1, offset); - - assert(physical_index_of_last < run_ends_size - physical_offset); - return {physical_offset, physical_index_of_last + 1}; -} - -/// \brief Uses binary-search to calculate the number of physical values (and -/// run-ends) necessary to represent the logical range of values from -/// offset to length -template -int64_t FindPhysicalLength(const RunEndCType* run_ends, int64_t run_ends_size, - int64_t length, int64_t offset) { - auto [_, physical_length] = - FindPhysicalRange(run_ends, run_ends_size, length, offset); - // GH-37107: This is a workaround for GCC 7. GCC 7 doesn't ignore - // variables in structured binding automatically from unused - // variables when one of these variables are used. - // See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767 - ARROW_UNUSED(_); - return physical_length; -} - -/// \brief Find the physical index into the values array of the REE ArraySpan -/// -/// This function uses binary-search, so it has a O(log N) cost. -template -int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, int64_t absolute_offset) { - const int64_t run_ends_size = RunEndsArray(span).length; - return FindPhysicalIndex(RunEnds(span), run_ends_size, i, absolute_offset); -} - -/// \brief Find the physical length of an REE ArraySpan -/// -/// The physical length of an REE is the number of physical values (and -/// run-ends) necessary to represent the logical range of values from -/// offset to length. -/// -/// Avoid calling this function if the physical length can be established in -/// some other way (e.g. when iterating over the runs sequentially until the -/// end). This function uses binary-search, so it has a O(log N) cost. -template -int64_t FindPhysicalLength(const ArraySpan& span) { - return FindPhysicalLength( - /*run_ends=*/RunEnds(span), - /*run_ends_size=*/RunEndsArray(span).length, - /*length=*/span.length, - /*offset=*/span.offset); -} - -template -struct PhysicalIndexFinder; - -// non-inline implementations for each run-end type -ARROW_EXPORT int64_t FindPhysicalIndexImpl16(PhysicalIndexFinder& self, - int64_t i); -ARROW_EXPORT int64_t FindPhysicalIndexImpl32(PhysicalIndexFinder& self, - int64_t i); -ARROW_EXPORT int64_t FindPhysicalIndexImpl64(PhysicalIndexFinder& self, - int64_t i); - -/// \brief Stateful version of FindPhysicalIndex() that caches the result of -/// the previous search and uses it to optimize the next search. -/// -/// When new queries for the physical index of a logical index come in, -/// binary search is performed again but the first candidate checked is the -/// result of the previous search (cached physical index) instead of the -/// midpoint of the run-ends array. -/// -/// If that test fails, internal::FindPhysicalIndex() is called with one of the -/// partitions defined by the cached index. If the queried logical indices -/// follow an increasing or decreasing pattern, this first test is much more -/// effective in (1) finding the answer right away (close logical indices belong -/// to the same runs) or (2) discarding many more candidates than probing -/// the midpoint would. -/// -/// The most adversarial case (i.e. alternating between 0 and length-1 queries) -/// only adds one extra binary search probe when compared to always starting -/// binary search from the midpoint without any of these optimizations. -/// -/// \tparam RunEndCType The numeric type of the run-ends array. -template -struct PhysicalIndexFinder { - const ArraySpan array_span; - const RunEndCType* run_ends; - int64_t last_physical_index = 0; - - explicit PhysicalIndexFinder(const ArrayData& data) - : array_span(data), - run_ends(RunEndsArray(array_span).template GetValues(1)) { - assert(CTypeTraits::ArrowType::type_id == - ::arrow::internal::checked_cast(*data.type) - .run_end_type() - ->id()); - } - - /// \brief Find the physical index into the values array of the REE array. - /// - /// \pre 0 <= i < array_span.length() - /// \param i the logical index into the REE array - /// \return the physical index into the values array - int64_t FindPhysicalIndex(int64_t i) { - if constexpr (std::is_same_v) { - return FindPhysicalIndexImpl16(*this, i); - } else if constexpr (std::is_same_v) { - return FindPhysicalIndexImpl32(*this, i); - } else { - static_assert(std::is_same_v, "Unsupported RunEndCType."); - return FindPhysicalIndexImpl64(*this, i); - } - } -}; - -} // namespace internal - -/// \brief Find the physical index into the values array of the REE ArraySpan -/// -/// This function uses binary-search, so it has a O(log N) cost. -ARROW_EXPORT int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, - int64_t absolute_offset); - -/// \brief Find the physical length of an REE ArraySpan -/// -/// The physical length of an REE is the number of physical values (and -/// run-ends) necessary to represent the logical range of values from -/// offset to length. -/// -/// Avoid calling this function if the physical length can be established in -/// some other way (e.g. when iterating over the runs sequentially until the -/// end). This function uses binary-search, so it has a O(log N) cost. -ARROW_EXPORT int64_t FindPhysicalLength(const ArraySpan& span); - -/// \brief Find the physical range of physical values referenced by the REE in -/// the logical range from offset to offset + length -/// -/// \return a pair of physical offset and physical length -ARROW_EXPORT std::pair FindPhysicalRange(const ArraySpan& span, - int64_t offset, - int64_t length); - -// Publish PhysicalIndexFinder outside of the internal namespace. -template -using PhysicalIndexFinder = internal::PhysicalIndexFinder; - -template -class RunEndEncodedArraySpan { - private: - struct PrivateTag {}; - - public: - /// \brief Iterator representing the current run during iteration over a - /// run-end encoded array - class Iterator { - public: - Iterator(PrivateTag, const RunEndEncodedArraySpan& span, int64_t logical_pos, - int64_t physical_pos) - : span(span), logical_pos_(logical_pos), physical_pos_(physical_pos) {} - - /// \brief Return the physical index of the run - /// - /// The values array can be addressed with this index to get the value - /// that makes up the run. - /// - /// NOTE: if this Iterator is equal to RunEndEncodedArraySpan::end(), - /// the value returned is undefined. - int64_t index_into_array() const { return physical_pos_; } - - /// \brief Return the initial logical position of the run - /// - /// If this Iterator is equal to RunEndEncodedArraySpan::end(), this is - /// the same as RunEndEncodedArraySpan::length(). - int64_t logical_position() const { return logical_pos_; } - - /// \brief Return the logical position immediately after the run. - /// - /// Pre-condition: *this != RunEndEncodedArraySpan::end() - int64_t run_end() const { return span.run_end(physical_pos_); } - - /// \brief Returns the logical length of the run. - /// - /// Pre-condition: *this != RunEndEncodedArraySpan::end() - int64_t run_length() const { return run_end() - logical_pos_; } - - /// \brief Check if the iterator is at the end of the array. - /// - /// This can be used to avoid paying the cost of a call to - /// RunEndEncodedArraySpan::end(). - /// - /// \return true if the iterator is at the end of the array - bool is_end(const RunEndEncodedArraySpan& span) const { - return logical_pos_ >= span.length(); - } - - Iterator& operator++() { - logical_pos_ = span.run_end(physical_pos_); - physical_pos_ += 1; - return *this; - } - - Iterator operator++(int) { - const Iterator prev = *this; - ++(*this); - return prev; - } - - Iterator& operator--() { - physical_pos_ -= 1; - logical_pos_ = (physical_pos_ > 0) ? span.run_end(physical_pos_ - 1) : 0; - return *this; - } - - Iterator operator--(int) { - const Iterator prev = *this; - --(*this); - return prev; - } - - bool operator==(const Iterator& other) const { - return logical_pos_ == other.logical_pos_; - } - - bool operator!=(const Iterator& other) const { - return logical_pos_ != other.logical_pos_; - } - - public: - const RunEndEncodedArraySpan& span; - - private: - int64_t logical_pos_; - int64_t physical_pos_; - }; - - // Prevent implicit ArrayData -> ArraySpan conversion in - // RunEndEncodedArraySpan instantiation. - explicit RunEndEncodedArraySpan(const ArrayData& data) = delete; - - /// \brief Construct a RunEndEncodedArraySpan from an ArraySpan and new - /// absolute offset and length. - /// - /// RunEndEncodedArraySpan{span, off, len} is equivalent to: - /// - /// span.SetSlice(off, len); - /// RunEndEncodedArraySpan{span} - /// - /// ArraySpan::SetSlice() updates the null_count to kUnknownNullCount, but - /// we don't need that here as REE arrays have null_count set to 0 by - /// convention. - explicit RunEndEncodedArraySpan(const ArraySpan& array_span, int64_t offset, - int64_t length) - : array_span_{array_span}, - run_ends_(RunEnds(array_span_)), - length_(length), - offset_(offset) { - assert(array_span_.type->id() == Type::RUN_END_ENCODED); - } - - explicit RunEndEncodedArraySpan(const ArraySpan& array_span) - : RunEndEncodedArraySpan(array_span, array_span.offset, array_span.length) {} - - int64_t offset() const { return offset_; } - int64_t length() const { return length_; } - - int64_t PhysicalIndex(int64_t logical_pos) const { - return internal::FindPhysicalIndex(run_ends_, RunEndsArray(array_span_).length, - logical_pos, offset_); - } - - /// \brief Create an iterator from a logical position and its - /// pre-computed physical offset into the run ends array - /// - /// \param logical_pos is an index in the [0, length()] range - /// \param physical_offset the pre-calculated PhysicalIndex(logical_pos) - Iterator iterator(int64_t logical_pos, int64_t physical_offset) const { - return Iterator{PrivateTag{}, *this, logical_pos, physical_offset}; - } - - /// \brief Create an iterator from a logical position - /// - /// \param logical_pos is an index in the [0, length()] range - Iterator iterator(int64_t logical_pos) const { - if (logical_pos < length()) { - return iterator(logical_pos, PhysicalIndex(logical_pos)); - } - // If logical_pos is above the valid range, use length() as the logical - // position and calculate the physical address right after the last valid - // physical position. Which is the physical index of the last logical - // position, plus 1. - return (length() == 0) ? iterator(0, PhysicalIndex(0)) - : iterator(length(), PhysicalIndex(length() - 1) + 1); - } - - /// \brief Create an iterator representing the logical begin of the run-end - /// encoded array - Iterator begin() const { return iterator(0, PhysicalIndex(0)); } - - /// \brief Create an iterator representing the first invalid logical position - /// of the run-end encoded array - /// - /// \warning Avoid calling end() in a loop, as it will recompute the physical - /// length of the array on each call (O(log N) cost per call). - /// - /// \par You can write your loops like this instead: - /// \code - /// for (auto it = array.begin(), end = array.end(); it != end; ++it) { - /// // ... - /// } - /// \endcode - /// - /// \par Or this version that does not look like idiomatic C++, but removes - /// the need for calling end() completely: - /// \code - /// for (auto it = array.begin(); !it.is_end(array); ++it) { - /// // ... - /// } - /// \endcode - Iterator end() const { - return iterator(length(), - (length() == 0) ? PhysicalIndex(0) : PhysicalIndex(length() - 1) + 1); - } - - // Pre-condition: physical_pos < RunEndsArray(array_span_).length); - inline int64_t run_end(int64_t physical_pos) const { - assert(physical_pos < RunEndsArray(array_span_).length); - // Logical index of the end of the run at physical_pos with offset applied - const int64_t logical_run_end = - std::max(static_cast(run_ends_[physical_pos]) - offset(), 0); - // The current run may go further than the logical length, cap it - return std::min(logical_run_end, length()); - } - - private: - const ArraySpan& array_span_; - const RunEndCType* run_ends_; - const int64_t length_; - const int64_t offset_; -}; - -/// \brief Iterate over two run-end encoded arrays in runs or sub-runs that are -/// inside run boundaries on both inputs -/// -/// Both RunEndEncodedArraySpan should have the same logical length. Instances -/// of this iterator only hold references to the RunEndEncodedArraySpan inputs. -template -class MergedRunsIterator { - private: - using LeftIterator = typename Left::Iterator; - using RightIterator = typename Right::Iterator; - - MergedRunsIterator(LeftIterator left_it, RightIterator right_it, - int64_t common_logical_length, int64_t common_logical_pos) - : ree_iterators_{std::move(left_it), std::move(right_it)}, - logical_length_(common_logical_length), - logical_pos_(common_logical_pos) {} - - public: - /// \brief Construct a MergedRunsIterator positioned at logical position 0. - /// - /// Pre-condition: left.length() == right.length() - MergedRunsIterator(const Left& left, const Right& right) - : MergedRunsIterator(left.begin(), right.begin(), left.length(), 0) { - assert(left.length() == right.length()); - } - - static Result MakeBegin(const Left& left, const Right& right) { - if (left.length() != right.length()) { - return Status::Invalid( - "MergedRunsIterator expects RunEndEncodedArraySpans of the same length"); - } - return MergedRunsIterator(left, right); - } - - static Result MakeEnd(const Left& left, const Right& right) { - if (left.length() != right.length()) { - return Status::Invalid( - "MergedRunsIterator expects RunEndEncodedArraySpans of the same length"); - } - return MergedRunsIterator(left.end(), right.end(), left.length(), left.length()); - } - - /// \brief Return the left RunEndEncodedArraySpan child - const Left& left() const { return std::get<0>(ree_iterators_).span; } - - /// \brief Return the right RunEndEncodedArraySpan child - const Right& right() const { return std::get<1>(ree_iterators_).span; } - - /// \brief Return the initial logical position of the run - /// - /// If is_end(), this is the same as length(). - int64_t logical_position() const { return logical_pos_; } - - /// \brief Whether the iterator is at logical position 0. - bool is_begin() const { return logical_pos_ == 0; } - - /// \brief Whether the iterator has reached the end of both arrays - bool is_end() const { return logical_pos_ == logical_length_; } - - /// \brief Return the logical position immediately after the run. - /// - /// Pre-condition: !is_end() - int64_t run_end() const { - const auto& left_it = std::get<0>(ree_iterators_); - const auto& right_it = std::get<1>(ree_iterators_); - return std::min(left_it.run_end(), right_it.run_end()); - } - - /// \brief returns the logical length of the current run - /// - /// Pre-condition: !is_end() - int64_t run_length() const { return run_end() - logical_pos_; } - - /// \brief Return a physical index into the values array of a given input, - /// pointing to the value of the current run - template - int64_t index_into_array() const { - return std::get(ree_iterators_).index_into_array(); - } - - int64_t index_into_left_array() const { return index_into_array<0>(); } - int64_t index_into_right_array() const { return index_into_array<1>(); } - - MergedRunsIterator& operator++() { - auto& left_it = std::get<0>(ree_iterators_); - auto& right_it = std::get<1>(ree_iterators_); - - const int64_t left_run_end = left_it.run_end(); - const int64_t right_run_end = right_it.run_end(); - - if (left_run_end < right_run_end) { - logical_pos_ = left_run_end; - ++left_it; - } else if (left_run_end > right_run_end) { - logical_pos_ = right_run_end; - ++right_it; - } else { - logical_pos_ = left_run_end; - ++left_it; - ++right_it; - } - return *this; - } - - MergedRunsIterator operator++(int) { - MergedRunsIterator prev = *this; - ++(*this); - return prev; - } - - MergedRunsIterator& operator--() { - auto& left_it = std::get<0>(ree_iterators_); - auto& right_it = std::get<1>(ree_iterators_); - - // The logical position of each iterator is the run_end() of the previous run. - const int64_t left_logical_pos = left_it.logical_position(); - const int64_t right_logical_pos = right_it.logical_position(); - - if (left_logical_pos < right_logical_pos) { - --right_it; - logical_pos_ = std::max(left_logical_pos, right_it.logical_position()); - } else if (left_logical_pos > right_logical_pos) { - --left_it; - logical_pos_ = std::max(left_it.logical_position(), right_logical_pos); - } else { - --left_it; - --right_it; - logical_pos_ = std::max(left_it.logical_position(), right_it.logical_position()); - } - return *this; - } - - MergedRunsIterator operator--(int) { - MergedRunsIterator prev = *this; - --(*this); - return prev; - } - - bool operator==(const MergedRunsIterator& other) const { - return logical_pos_ == other.logical_position(); - } - - bool operator!=(const MergedRunsIterator& other) const { return !(*this == other); } - - private: - std::tuple ree_iterators_; - const int64_t logical_length_; - int64_t logical_pos_; -}; - -} // namespace ree_util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/regex.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/regex.h deleted file mode 100644 index 590fbac7..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/regex.h +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "arrow/util/visibility.h" - -namespace arrow { -namespace internal { - -/// Match regex against target and produce string_views out of matches. -inline bool RegexMatch(const std::regex& regex, std::string_view target, - std::initializer_list out_matches) { - assert(regex.mark_count() == out_matches.size()); - - std::match_results match; - if (!std::regex_match(target.begin(), target.end(), match, regex)) { - return false; - } - - // Match #0 is the whole matched sequence - assert(regex.mark_count() + 1 == match.size()); - auto out_it = out_matches.begin(); - for (size_t i = 1; i < match.size(); ++i) { - **out_it++ = target.substr(match.position(i), match.length(i)); - } - return true; -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/rle_encoding.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/rle_encoding.h deleted file mode 100644 index e0f56900..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/rle_encoding.h +++ /dev/null @@ -1,826 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Imported from Apache Impala (incubating) on 2016-01-29 and modified for use -// in parquet-cpp, Arrow - -#pragma once - -#include -#include -#include -#include - -#include "arrow/util/bit_block_counter.h" -#include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" -#include "arrow/util/bit_util.h" -#include "arrow/util/macros.h" - -namespace arrow { -namespace util { - -/// Utility classes to do run length encoding (RLE) for fixed bit width values. If runs -/// are sufficiently long, RLE is used, otherwise, the values are just bit-packed -/// (literal encoding). -/// For both types of runs, there is a byte-aligned indicator which encodes the length -/// of the run and the type of the run. -/// This encoding has the benefit that when there aren't any long enough runs, values -/// are always decoded at fixed (can be precomputed) bit offsets OR both the value and -/// the run length are byte aligned. This allows for very efficient decoding -/// implementations. -/// The encoding is: -/// encoded-block := run* -/// run := literal-run | repeated-run -/// literal-run := literal-indicator < literal bytes > -/// repeated-run := repeated-indicator < repeated value. padded to byte boundary > -/// literal-indicator := varint_encode( number_of_groups << 1 | 1) -/// repeated-indicator := varint_encode( number_of_repetitions << 1 ) -// -/// Each run is preceded by a varint. The varint's least significant bit is -/// used to indicate whether the run is a literal run or a repeated run. The rest -/// of the varint is used to determine the length of the run (eg how many times the -/// value repeats). -// -/// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode -/// in groups of 8), so that no matter the bit-width of the value, the sequence will end -/// on a byte boundary without padding. -/// Given that we know it is a multiple of 8, we store the number of 8-groups rather than -/// the actual number of encoded ints. (This means that the total number of encoded values -/// cannot be determined from the encoded data, since the number of values in the last -/// group may not be a multiple of 8). For the last group of literal runs, we pad -/// the group to 8 with zeros. This allows for 8 at a time decoding on the read side -/// without the need for additional checks. -// -/// There is a break-even point when it is more storage efficient to do run length -/// encoding. For 1 bit-width values, that point is 8 values. They require 2 bytes -/// for both the repeated encoding or the literal encoding. This value can always -/// be computed based on the bit-width. -/// TODO: think about how to use this for strings. The bit packing isn't quite the same. -// -/// Examples with bit-width 1 (eg encoding booleans): -/// ---------------------------------------- -/// 100 1s followed by 100 0s: -/// <1, padded to 1 byte> <0, padded to 1 byte> -/// - (total 4 bytes) -// -/// alternating 1s and 0s (200 total): -/// 200 ints = 25 groups of 8 -/// <25 bytes of values, bitpacked> -/// (total 26 bytes, 1 byte overhead) -// - -/// Decoder class for RLE encoded data. -class RleDecoder { - public: - /// Create a decoder object. buffer/buffer_len is the decoded data. - /// bit_width is the width of each value (before encoding). - RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width) - : bit_reader_(buffer, buffer_len), - bit_width_(bit_width), - current_value_(0), - repeat_count_(0), - literal_count_(0) { - DCHECK_GE(bit_width_, 0); - DCHECK_LE(bit_width_, 64); - } - - RleDecoder() : bit_width_(-1) {} - - void Reset(const uint8_t* buffer, int buffer_len, int bit_width) { - DCHECK_GE(bit_width, 0); - DCHECK_LE(bit_width, 64); - bit_reader_.Reset(buffer, buffer_len); - bit_width_ = bit_width; - current_value_ = 0; - repeat_count_ = 0; - literal_count_ = 0; - } - - /// Gets the next value. Returns false if there are no more. - template - bool Get(T* val); - - /// Gets a batch of values. Returns the number of decoded elements. - template - int GetBatch(T* values, int batch_size); - - /// Like GetBatch but add spacing for null entries - template - int GetBatchSpaced(int batch_size, int null_count, const uint8_t* valid_bits, - int64_t valid_bits_offset, T* out); - - /// Like GetBatch but the values are then decoded using the provided dictionary - template - int GetBatchWithDict(const T* dictionary, int32_t dictionary_length, T* values, - int batch_size); - - /// Like GetBatchWithDict but add spacing for null entries - /// - /// Null entries will be zero-initialized in `values` to avoid leaking - /// private data. - template - int GetBatchWithDictSpaced(const T* dictionary, int32_t dictionary_length, T* values, - int batch_size, int null_count, const uint8_t* valid_bits, - int64_t valid_bits_offset); - - protected: - ::arrow::bit_util::BitReader bit_reader_; - /// Number of bits needed to encode the value. Must be between 0 and 64. - int bit_width_; - uint64_t current_value_; - int32_t repeat_count_; - int32_t literal_count_; - - private: - /// Fills literal_count_ and repeat_count_ with next values. Returns false if there - /// are no more. - template - bool NextCounts(); - - /// Utility methods for retrieving spaced values. - template - int GetSpaced(Converter converter, int batch_size, int null_count, - const uint8_t* valid_bits, int64_t valid_bits_offset, T* out); -}; - -/// Class to incrementally build the rle data. This class does not allocate any memory. -/// The encoding has two modes: encoding repeated runs and literal runs. -/// If the run is sufficiently short, it is more efficient to encode as a literal run. -/// This class does so by buffering 8 values at a time. If they are not all the same -/// they are added to the literal run. If they are the same, they are added to the -/// repeated run. When we switch modes, the previous run is flushed out. -class RleEncoder { - public: - /// buffer/buffer_len: preallocated output buffer. - /// bit_width: max number of bits for value. - /// TODO: consider adding a min_repeated_run_length so the caller can control - /// when values should be encoded as repeated runs. Currently this is derived - /// based on the bit_width, which can determine a storage optimal choice. - /// TODO: allow 0 bit_width (and have dict encoder use it) - RleEncoder(uint8_t* buffer, int buffer_len, int bit_width) - : bit_width_(bit_width), bit_writer_(buffer, buffer_len) { - DCHECK_GE(bit_width_, 0); - DCHECK_LE(bit_width_, 64); - max_run_byte_size_ = MinBufferSize(bit_width); - DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough."; - Clear(); - } - - /// Returns the minimum buffer size needed to use the encoder for 'bit_width' - /// This is the maximum length of a single run for 'bit_width'. - /// It is not valid to pass a buffer less than this length. - static int MinBufferSize(int bit_width) { - /// 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values. - int max_literal_run_size = 1 + static_cast(::arrow::bit_util::BytesForBits( - MAX_VALUES_PER_LITERAL_RUN * bit_width)); - /// Up to kMaxVlqByteLength indicator and a single 'bit_width' value. - int max_repeated_run_size = - ::arrow::bit_util::BitReader::kMaxVlqByteLength + - static_cast(::arrow::bit_util::BytesForBits(bit_width)); - return std::max(max_literal_run_size, max_repeated_run_size); - } - - /// Returns the maximum byte size it could take to encode 'num_values'. - static int MaxBufferSize(int bit_width, int num_values) { - // For a bit_width > 1, the worst case is the repetition of "literal run of length 8 - // and then a repeated run of length 8". - // 8 values per smallest run, 8 bits per byte - int bytes_per_run = bit_width; - int num_runs = static_cast(::arrow::bit_util::CeilDiv(num_values, 8)); - int literal_max_size = num_runs + num_runs * bytes_per_run; - - // In the very worst case scenario, the data is a concatenation of repeated - // runs of 8 values. Repeated run has a 1 byte varint followed by the - // bit-packed repeated value - int min_repeated_run_size = - 1 + static_cast(::arrow::bit_util::BytesForBits(bit_width)); - int repeated_max_size = num_runs * min_repeated_run_size; - - return std::max(literal_max_size, repeated_max_size); - } - - /// Encode value. Returns true if the value fits in buffer, false otherwise. - /// This value must be representable with bit_width_ bits. - bool Put(uint64_t value); - - /// Flushes any pending values to the underlying buffer. - /// Returns the total number of bytes written - int Flush(); - - /// Resets all the state in the encoder. - void Clear(); - - /// Returns pointer to underlying buffer - uint8_t* buffer() { return bit_writer_.buffer(); } - int32_t len() { return bit_writer_.bytes_written(); } - - private: - /// Flushes any buffered values. If this is part of a repeated run, this is largely - /// a no-op. - /// If it is part of a literal run, this will call FlushLiteralRun, which writes - /// out the buffered literal values. - /// If 'done' is true, the current run would be written even if it would normally - /// have been buffered more. This should only be called at the end, when the - /// encoder has received all values even if it would normally continue to be - /// buffered. - void FlushBufferedValues(bool done); - - /// Flushes literal values to the underlying buffer. If update_indicator_byte, - /// then the current literal run is complete and the indicator byte is updated. - void FlushLiteralRun(bool update_indicator_byte); - - /// Flushes a repeated run to the underlying buffer. - void FlushRepeatedRun(); - - /// Checks and sets buffer_full_. This must be called after flushing a run to - /// make sure there are enough bytes remaining to encode the next run. - void CheckBufferFull(); - - /// The maximum number of values in a single literal run - /// (number of groups encodable by a 1-byte indicator * 8) - static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8; - - /// Number of bits needed to encode the value. Must be between 0 and 64. - const int bit_width_; - - /// Underlying buffer. - ::arrow::bit_util::BitWriter bit_writer_; - - /// If true, the buffer is full and subsequent Put()'s will fail. - bool buffer_full_; - - /// The maximum byte size a single run can take. - int max_run_byte_size_; - - /// We need to buffer at most 8 values for literals. This happens when the - /// bit_width is 1 (so 8 values fit in one byte). - /// TODO: generalize this to other bit widths - int64_t buffered_values_[8]; - - /// Number of values in buffered_values_ - int num_buffered_values_; - - /// The current (also last) value that was written and the count of how - /// many times in a row that value has been seen. This is maintained even - /// if we are in a literal run. If the repeat_count_ get high enough, we switch - /// to encoding repeated runs. - uint64_t current_value_; - int repeat_count_; - - /// Number of literals in the current run. This does not include the literals - /// that might be in buffered_values_. Only after we've got a group big enough - /// can we decide if they should part of the literal_count_ or repeat_count_ - int literal_count_; - - /// Pointer to a byte in the underlying buffer that stores the indicator byte. - /// This is reserved as soon as we need a literal run but the value is written - /// when the literal run is complete. - uint8_t* literal_indicator_byte_; -}; - -template -inline bool RleDecoder::Get(T* val) { - return GetBatch(val, 1) == 1; -} - -template -inline int RleDecoder::GetBatch(T* values, int batch_size) { - DCHECK_GE(bit_width_, 0); - int values_read = 0; - - auto* out = values; - - while (values_read < batch_size) { - int remaining = batch_size - values_read; - - if (repeat_count_ > 0) { // Repeated value case. - int repeat_batch = std::min(remaining, repeat_count_); - std::fill(out, out + repeat_batch, static_cast(current_value_)); - - repeat_count_ -= repeat_batch; - values_read += repeat_batch; - out += repeat_batch; - } else if (literal_count_ > 0) { - int literal_batch = std::min(remaining, literal_count_); - int actual_read = bit_reader_.GetBatch(bit_width_, out, literal_batch); - if (actual_read != literal_batch) { - return values_read; - } - - literal_count_ -= literal_batch; - values_read += literal_batch; - out += literal_batch; - } else { - if (!NextCounts()) return values_read; - } - } - - return values_read; -} - -template -inline int RleDecoder::GetSpaced(Converter converter, int batch_size, int null_count, - const uint8_t* valid_bits, int64_t valid_bits_offset, - T* out) { - if (ARROW_PREDICT_FALSE(null_count == batch_size)) { - converter.FillZero(out, out + batch_size); - return batch_size; - } - - DCHECK_GE(bit_width_, 0); - int values_read = 0; - int values_remaining = batch_size - null_count; - - // Assume no bits to start. - arrow::internal::BitRunReader bit_reader(valid_bits, valid_bits_offset, - /*length=*/batch_size); - arrow::internal::BitRun valid_run = bit_reader.NextRun(); - while (values_read < batch_size) { - if (ARROW_PREDICT_FALSE(valid_run.length == 0)) { - valid_run = bit_reader.NextRun(); - } - - DCHECK_GT(batch_size, 0); - DCHECK_GT(valid_run.length, 0); - - if (valid_run.set) { - if ((repeat_count_ == 0) && (literal_count_ == 0)) { - if (!NextCounts()) return values_read; - DCHECK((repeat_count_ > 0) ^ (literal_count_ > 0)); - } - - if (repeat_count_ > 0) { - int repeat_batch = 0; - // Consume the entire repeat counts incrementing repeat_batch to - // be the total of nulls + values consumed, we only need to - // get the total count because we can fill in the same value for - // nulls and non-nulls. This proves to be a big efficiency win. - while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) { - DCHECK_GT(valid_run.length, 0); - if (valid_run.set) { - int update_size = std::min(static_cast(valid_run.length), repeat_count_); - repeat_count_ -= update_size; - repeat_batch += update_size; - valid_run.length -= update_size; - values_remaining -= update_size; - } else { - // We can consume all nulls here because we would do so on - // the next loop anyways. - repeat_batch += static_cast(valid_run.length); - valid_run.length = 0; - } - if (valid_run.length == 0) { - valid_run = bit_reader.NextRun(); - } - } - RunType current_value = static_cast(current_value_); - if (ARROW_PREDICT_FALSE(!converter.IsValid(current_value))) { - return values_read; - } - converter.Fill(out, out + repeat_batch, current_value); - out += repeat_batch; - values_read += repeat_batch; - } else if (literal_count_ > 0) { - int literal_batch = std::min(values_remaining, literal_count_); - DCHECK_GT(literal_batch, 0); - - // Decode the literals - constexpr int kBufferSize = 1024; - RunType indices[kBufferSize]; - literal_batch = std::min(literal_batch, kBufferSize); - int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch); - if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { - return values_read; - } - if (!converter.IsValid(indices, /*length=*/actual_read)) { - return values_read; - } - int skipped = 0; - int literals_read = 0; - while (literals_read < literal_batch) { - if (valid_run.set) { - int update_size = std::min(literal_batch - literals_read, - static_cast(valid_run.length)); - converter.Copy(out, indices + literals_read, update_size); - literals_read += update_size; - out += update_size; - valid_run.length -= update_size; - } else { - converter.FillZero(out, out + valid_run.length); - out += valid_run.length; - skipped += static_cast(valid_run.length); - valid_run.length = 0; - } - if (valid_run.length == 0) { - valid_run = bit_reader.NextRun(); - } - } - literal_count_ -= literal_batch; - values_remaining -= literal_batch; - values_read += literal_batch + skipped; - } - } else { - converter.FillZero(out, out + valid_run.length); - out += valid_run.length; - values_read += static_cast(valid_run.length); - valid_run.length = 0; - } - } - DCHECK_EQ(valid_run.length, 0); - DCHECK_EQ(values_remaining, 0); - return values_read; -} - -// Converter for GetSpaced that handles runs that get returned -// directly as output. -template -struct PlainRleConverter { - T kZero = {}; - inline bool IsValid(const T& values) const { return true; } - inline bool IsValid(const T* values, int32_t length) const { return true; } - inline void Fill(T* begin, T* end, const T& run_value) const { - std::fill(begin, end, run_value); - } - inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } - inline void Copy(T* out, const T* values, int length) const { - std::memcpy(out, values, length * sizeof(T)); - } -}; - -template -inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, - const uint8_t* valid_bits, - int64_t valid_bits_offset, T* out) { - if (null_count == 0) { - return GetBatch(out, batch_size); - } - - PlainRleConverter converter; - arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, - batch_size); - - int total_processed = 0; - int processed = 0; - arrow::internal::BitBlockCount block; - - do { - block = block_counter.NextFourWords(); - if (block.length == 0) { - break; - } - if (block.AllSet()) { - processed = GetBatch(out, block.length); - } else if (block.NoneSet()) { - converter.FillZero(out, out + block.length); - processed = block.length; - } else { - processed = GetSpaced>( - converter, block.length, block.length - block.popcount, valid_bits, - valid_bits_offset, out); - } - total_processed += processed; - out += block.length; - valid_bits_offset += block.length; - } while (processed == block.length); - return total_processed; -} - -static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) { - return idx >= 0 && idx < dictionary_length; -} - -// Converter for GetSpaced that handles runs of returned dictionary -// indices. -template -struct DictionaryConverter { - T kZero = {}; - const T* dictionary; - int32_t dictionary_length; - - inline bool IsValid(int32_t value) { return IndexInRange(value, dictionary_length); } - - inline bool IsValid(const int32_t* values, int32_t length) const { - using IndexType = int32_t; - IndexType min_index = std::numeric_limits::max(); - IndexType max_index = std::numeric_limits::min(); - for (int x = 0; x < length; x++) { - min_index = std::min(values[x], min_index); - max_index = std::max(values[x], max_index); - } - - return IndexInRange(min_index, dictionary_length) && - IndexInRange(max_index, dictionary_length); - } - inline void Fill(T* begin, T* end, const int32_t& run_value) const { - std::fill(begin, end, dictionary[run_value]); - } - inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } - - inline void Copy(T* out, const int32_t* values, int length) const { - for (int x = 0; x < length; x++) { - out[x] = dictionary[values[x]]; - } - } -}; - -template -inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_length, - T* values, int batch_size) { - // Per https://github.com/apache/parquet-format/blob/master/Encodings.md, - // the maximum dictionary index width in Parquet is 32 bits. - using IndexType = int32_t; - DictionaryConverter converter; - converter.dictionary = dictionary; - converter.dictionary_length = dictionary_length; - - DCHECK_GE(bit_width_, 0); - int values_read = 0; - - auto* out = values; - - while (values_read < batch_size) { - int remaining = batch_size - values_read; - - if (repeat_count_ > 0) { - auto idx = static_cast(current_value_); - if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) { - return values_read; - } - T val = dictionary[idx]; - - int repeat_batch = std::min(remaining, repeat_count_); - std::fill(out, out + repeat_batch, val); - - /* Upkeep counters */ - repeat_count_ -= repeat_batch; - values_read += repeat_batch; - out += repeat_batch; - } else if (literal_count_ > 0) { - constexpr int kBufferSize = 1024; - IndexType indices[kBufferSize]; - - int literal_batch = std::min(remaining, literal_count_); - literal_batch = std::min(literal_batch, kBufferSize); - - int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch); - if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { - return values_read; - } - if (ARROW_PREDICT_FALSE(!converter.IsValid(indices, /*length=*/literal_batch))) { - return values_read; - } - converter.Copy(out, indices, literal_batch); - - /* Upkeep counters */ - literal_count_ -= literal_batch; - values_read += literal_batch; - out += literal_batch; - } else { - if (!NextCounts()) return values_read; - } - } - - return values_read; -} - -template -inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary, - int32_t dictionary_length, T* out, - int batch_size, int null_count, - const uint8_t* valid_bits, - int64_t valid_bits_offset) { - if (null_count == 0) { - return GetBatchWithDict(dictionary, dictionary_length, out, batch_size); - } - arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, - batch_size); - using IndexType = int32_t; - DictionaryConverter converter; - converter.dictionary = dictionary; - converter.dictionary_length = dictionary_length; - - int total_processed = 0; - int processed = 0; - arrow::internal::BitBlockCount block; - do { - block = block_counter.NextFourWords(); - if (block.length == 0) { - break; - } - if (block.AllSet()) { - processed = GetBatchWithDict(dictionary, dictionary_length, out, block.length); - } else if (block.NoneSet()) { - converter.FillZero(out, out + block.length); - processed = block.length; - } else { - processed = GetSpaced>( - converter, block.length, block.length - block.popcount, valid_bits, - valid_bits_offset, out); - } - total_processed += processed; - out += block.length; - valid_bits_offset += block.length; - } while (processed == block.length); - return total_processed; -} - -template -bool RleDecoder::NextCounts() { - // Read the next run's indicator int, it could be a literal or repeated run. - // The int is encoded as a vlq-encoded value. - uint32_t indicator_value = 0; - if (!bit_reader_.GetVlqInt(&indicator_value)) return false; - - // lsb indicates if it is a literal run or repeated run - bool is_literal = indicator_value & 1; - uint32_t count = indicator_value >> 1; - if (is_literal) { - if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast(INT32_MAX) / 8)) { - return false; - } - literal_count_ = count * 8; - } else { - if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast(INT32_MAX))) { - return false; - } - repeat_count_ = count; - T value = {}; - if (!bit_reader_.GetAligned( - static_cast(::arrow::bit_util::CeilDiv(bit_width_, 8)), &value)) { - return false; - } - current_value_ = static_cast(value); - } - return true; -} - -/// This function buffers input values 8 at a time. After seeing all 8 values, -/// it decides whether they should be encoded as a literal or repeated run. -inline bool RleEncoder::Put(uint64_t value) { - DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_)); - if (ARROW_PREDICT_FALSE(buffer_full_)) return false; - - if (ARROW_PREDICT_TRUE(current_value_ == value)) { - ++repeat_count_; - if (repeat_count_ > 8) { - // This is just a continuation of the current run, no need to buffer the - // values. - // Note that this is the fast path for long repeated runs. - return true; - } - } else { - if (repeat_count_ >= 8) { - // We had a run that was long enough but it has ended. Flush the - // current repeated run. - DCHECK_EQ(literal_count_, 0); - FlushRepeatedRun(); - } - repeat_count_ = 1; - current_value_ = value; - } - - buffered_values_[num_buffered_values_] = value; - if (++num_buffered_values_ == 8) { - DCHECK_EQ(literal_count_ % 8, 0); - FlushBufferedValues(false); - } - return true; -} - -inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) { - if (literal_indicator_byte_ == NULL) { - // The literal indicator byte has not been reserved yet, get one now. - literal_indicator_byte_ = bit_writer_.GetNextBytePtr(); - DCHECK(literal_indicator_byte_ != NULL); - } - - // Write all the buffered values as bit packed literals - for (int i = 0; i < num_buffered_values_; ++i) { - bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_); - DCHECK(success) << "There is a bug in using CheckBufferFull()"; - } - num_buffered_values_ = 0; - - if (update_indicator_byte) { - // At this point we need to write the indicator byte for the literal run. - // We only reserve one byte, to allow for streaming writes of literal values. - // The logic makes sure we flush literal runs often enough to not overrun - // the 1 byte. - DCHECK_EQ(literal_count_ % 8, 0); - int num_groups = literal_count_ / 8; - int32_t indicator_value = (num_groups << 1) | 1; - DCHECK_EQ(indicator_value & 0xFFFFFF00, 0); - *literal_indicator_byte_ = static_cast(indicator_value); - literal_indicator_byte_ = NULL; - literal_count_ = 0; - CheckBufferFull(); - } -} - -inline void RleEncoder::FlushRepeatedRun() { - DCHECK_GT(repeat_count_, 0); - bool result = true; - // The lsb of 0 indicates this is a repeated run - int32_t indicator_value = repeat_count_ << 1 | 0; - result &= bit_writer_.PutVlqInt(static_cast(indicator_value)); - result &= bit_writer_.PutAligned( - current_value_, static_cast(::arrow::bit_util::CeilDiv(bit_width_, 8))); - DCHECK(result); - num_buffered_values_ = 0; - repeat_count_ = 0; - CheckBufferFull(); -} - -/// Flush the values that have been buffered. At this point we decide whether -/// we need to switch between the run types or continue the current one. -inline void RleEncoder::FlushBufferedValues(bool done) { - if (repeat_count_ >= 8) { - // Clear the buffered values. They are part of the repeated run now and we - // don't want to flush them out as literals. - num_buffered_values_ = 0; - if (literal_count_ != 0) { - // There was a current literal run. All the values in it have been flushed - // but we still need to update the indicator byte. - DCHECK_EQ(literal_count_ % 8, 0); - DCHECK_EQ(repeat_count_, 8); - FlushLiteralRun(true); - } - DCHECK_EQ(literal_count_, 0); - return; - } - - literal_count_ += num_buffered_values_; - DCHECK_EQ(literal_count_ % 8, 0); - int num_groups = literal_count_ / 8; - if (num_groups + 1 >= (1 << 6)) { - // We need to start a new literal run because the indicator byte we've reserved - // cannot store more values. - DCHECK(literal_indicator_byte_ != NULL); - FlushLiteralRun(true); - } else { - FlushLiteralRun(done); - } - repeat_count_ = 0; -} - -inline int RleEncoder::Flush() { - if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) { - bool all_repeat = literal_count_ == 0 && (repeat_count_ == num_buffered_values_ || - num_buffered_values_ == 0); - // There is something pending, figure out if it's a repeated or literal run - if (repeat_count_ > 0 && all_repeat) { - FlushRepeatedRun(); - } else { - DCHECK_EQ(literal_count_ % 8, 0); - // Buffer the last group of literals to 8 by padding with 0s. - for (; num_buffered_values_ != 0 && num_buffered_values_ < 8; - ++num_buffered_values_) { - buffered_values_[num_buffered_values_] = 0; - } - literal_count_ += num_buffered_values_; - FlushLiteralRun(true); - repeat_count_ = 0; - } - } - bit_writer_.Flush(); - DCHECK_EQ(num_buffered_values_, 0); - DCHECK_EQ(literal_count_, 0); - DCHECK_EQ(repeat_count_, 0); - - return bit_writer_.bytes_written(); -} - -inline void RleEncoder::CheckBufferFull() { - int bytes_written = bit_writer_.bytes_written(); - if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) { - buffer_full_ = true; - } -} - -inline void RleEncoder::Clear() { - buffer_full_ = false; - current_value_ = 0; - repeat_count_ = 0; - num_buffered_values_ = 0; - literal_count_ = 0; - literal_indicator_byte_ = NULL; - bit_writer_.Clear(); -} - -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/rows_to_batches.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/rows_to_batches.h deleted file mode 100644 index 8ad254df..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/rows_to_batches.h +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/record_batch.h" -#include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/table_builder.h" -#include "arrow/util/iterator.h" - -#include - -namespace arrow::util { - -namespace detail { - -// Default identity function row accessor. Used to for the common case where the value -// of each row iterated over is it's self also directly iterable. -[[nodiscard]] constexpr inline auto MakeDefaultRowAccessor() { - return [](auto& x) -> Result { return std::ref(x); }; -} - -// Meta-function to check if a type `T` is a range (iterable using `std::begin()` / -// `std::end()`). `is_range::value` will be false if `T` is not a valid range. -template -struct is_range : std::false_type {}; - -template -struct is_range())), - decltype(std::end(std::declval()))>> : std::true_type { -}; - -} // namespace detail - -/// Delete overload for `const Range&& rows` because the data's lifetime must exceed -/// the lifetime of the function call. `data` will be read when client uses the -/// `RecordBatchReader` -template -[[nodiscard]] typename std::enable_if_t::value, - Result>> -/* Result>> */ RowsToBatches( - const std::shared_ptr& schema, const Range&& rows, - DataPointConvertor&& data_point_convertor, - RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor(), - MemoryPool* pool = default_memory_pool(), - const std::size_t batch_size = 1024) = delete; - -/// \brief Utility function for converting any row-based structure into an -/// `arrow::RecordBatchReader` (this can be easily converted to an `arrow::Table` using -/// `arrow::RecordBatchReader::ToTable()`). -/// -/// Examples of supported types: -/// - `std::vector>>` -/// - `std::vector` - -/// If `rows` (client’s row-based structure) is not a valid C++ range, the client will -/// need to either make it iterable, or make an adapter/wrapper that is a valid C++ -/// range. - -/// The client must provide a `DataPointConvertor` callable type that will convert the -/// structure’s data points into the corresponding arrow types. - -/// Complex nested rows can be supported by providing a custom `row_accessor` instead -/// of the default. - -/// Example usage: -/// \code{.cpp} -/// auto IntConvertor = [](ArrayBuilder& array_builder, int value) { -/// return static_cast(array_builder).Append(value); -/// }; -/// std::vector> data = {{1, 2, 4}, {5, 6, 7}}; -/// auto batches = RowsToBatches(kTestSchema, data, IntConvertor); -/// \endcode - -/// \param[in] schema - The schema to be used in the `RecordBatchReader` - -/// \param[in] rows - Iterable row-based structure that will be converted to arrow -/// batches - -/// \param[in] data_point_convertor - Client provided callable type that will convert -/// the structure’s data points into the corresponding arrow types. The convertor must -/// return an error `Status` if an error happens during conversion. - -/// \param[in] row_accessor - In the common case where the value of each row iterated -/// over is it's self also directly iterable, the client can just use the default. -/// The provided callable must take the values of the `rows` range and return a -/// `std::reference_wrapper` to the data points in a given row. The data points -/// must be in order of their corresponding fields in the schema. -/// see: /ref `MakeDefaultRowAccessor` - -/// \param[in] pool - The MemoryPool to use for allocations. - -/// \param[in] batch_size - Number of rows to insert into each RecordBatch. - -/// \return `Result>>` result will be a -/// `std::shared_ptr>` if not errors occurred, else an error status. -template -[[nodiscard]] typename std::enable_if_t::value, - Result>> -/* Result>> */ RowsToBatches( - const std::shared_ptr& schema, const Range& rows, - DataPointConvertor&& data_point_convertor, - RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor(), - MemoryPool* pool = default_memory_pool(), const std::size_t batch_size = 1024) { - auto make_next_batch = - [pool = pool, batch_size = batch_size, rows_ittr = std::begin(rows), - rows_ittr_end = std::end(rows), schema = schema, - row_accessor = std::forward(row_accessor), - data_point_convertor = std::forward( - data_point_convertor)]() mutable -> Result> { - if (rows_ittr == rows_ittr_end) return NULLPTR; - - ARROW_ASSIGN_OR_RAISE(auto record_batch_builder, - RecordBatchBuilder::Make(schema, pool, batch_size)); - - for (size_t i = 0; i < batch_size && (rows_ittr != rows_ittr_end); - i++, std::advance(rows_ittr, 1)) { - int col_index = 0; - ARROW_ASSIGN_OR_RAISE(const auto row, row_accessor(*rows_ittr)); - - // If the accessor returns a `std::reference_wrapper` unwrap if - const auto& row_unwrapped = [&]() { - if constexpr (detail::is_range::value) - return row; - else - return row.get(); - }(); - - for (auto& data_point : row_unwrapped) { - ArrayBuilder* array_builder = record_batch_builder->GetField(col_index); - ARROW_RETURN_IF(array_builder == NULLPTR, - Status::Invalid("array_builder == NULLPTR")); - - ARROW_RETURN_NOT_OK(data_point_convertor(*array_builder, data_point)); - col_index++; - } - } - - ARROW_ASSIGN_OR_RAISE(auto result, record_batch_builder->Flush()); - return result; - }; - return RecordBatchReader::MakeFromIterator(MakeFunctionIterator(make_next_batch), - schema); -} - -} // namespace arrow::util diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/simd.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/simd.h deleted file mode 100644 index ee9105d5..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/simd.h +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#ifdef _MSC_VER -// MSVC x86_64/arm64 - -#if defined(_M_AMD64) || defined(_M_X64) -#include -#endif - -#else -// gcc/clang (possibly others) - -#if defined(ARROW_HAVE_BMI2) -#include -#endif - -#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512) -#include -#elif defined(ARROW_HAVE_SSE4_2) -#include -#endif - -#ifdef ARROW_HAVE_NEON -#include -#endif - -#endif diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/small_vector.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/small_vector.h deleted file mode 100644 index 52e191c4..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/small_vector.h +++ /dev/null @@ -1,511 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "arrow/util/aligned_storage.h" -#include "arrow/util/macros.h" - -namespace arrow { -namespace internal { - -template -struct StaticVectorStorageBase { - using storage_type = AlignedStorage; - - storage_type static_data_[N]; - size_t size_ = 0; - - void destroy() noexcept {} -}; - -template -struct StaticVectorStorageBase { - using storage_type = AlignedStorage; - - storage_type static_data_[N]; - size_t size_ = 0; - - ~StaticVectorStorageBase() noexcept { destroy(); } - - void destroy() noexcept { storage_type::destroy_several(static_data_, size_); } -}; - -template ::value> -struct StaticVectorStorage : public StaticVectorStorageBase { - using Base = StaticVectorStorageBase; - using typename Base::storage_type; - - using Base::size_; - using Base::static_data_; - - StaticVectorStorage() noexcept = default; - - constexpr storage_type* storage_ptr() { return static_data_; } - - constexpr const storage_type* const_storage_ptr() const { return static_data_; } - - // Adjust storage size, but don't initialize any objects - void bump_size(size_t addend) { - assert(size_ + addend <= N); - size_ += addend; - } - - void ensure_capacity(size_t min_capacity) { assert(min_capacity <= N); } - - // Adjust storage size, but don't destroy any objects - void reduce_size(size_t reduce_by) { - assert(reduce_by <= size_); - size_ -= reduce_by; - } - - // Move objects from another storage, but don't destroy any objects currently - // stored in *this. - // You need to call destroy() first if necessary (e.g. in a - // move assignment operator). - void move_construct(StaticVectorStorage&& other) noexcept { - size_ = other.size_; - if (size_ != 0) { - // Use a compile-time memcpy size (N) for trivial types - storage_type::move_construct_several(other.static_data_, static_data_, size_, N); - } - } - - constexpr size_t capacity() const { return N; } - - constexpr size_t max_size() const { return N; } - - void reserve(size_t n) {} - - void clear() { - storage_type::destroy_several(static_data_, size_); - size_ = 0; - } -}; - -template -struct SmallVectorStorage { - using storage_type = AlignedStorage; - - storage_type static_data_[N]; - size_t size_ = 0; - storage_type* data_ = static_data_; - size_t dynamic_capacity_ = 0; - - SmallVectorStorage() noexcept = default; - - ~SmallVectorStorage() { destroy(); } - - constexpr storage_type* storage_ptr() { return data_; } - - constexpr const storage_type* const_storage_ptr() const { return data_; } - - void bump_size(size_t addend) { - const size_t new_size = size_ + addend; - ensure_capacity(new_size); - size_ = new_size; - } - - void ensure_capacity(size_t min_capacity) { - if (dynamic_capacity_) { - // Grow dynamic storage if necessary - if (min_capacity > dynamic_capacity_) { - size_t new_capacity = std::max(dynamic_capacity_ * 2, min_capacity); - reallocate_dynamic(new_capacity); - } - } else if (min_capacity > N) { - switch_to_dynamic(min_capacity); - } - } - - void reduce_size(size_t reduce_by) { - assert(reduce_by <= size_); - size_ -= reduce_by; - } - - void destroy() noexcept { - storage_type::destroy_several(data_, size_); - if (dynamic_capacity_) { - delete[] data_; - } - } - - void move_construct(SmallVectorStorage&& other) noexcept { - size_ = other.size_; - dynamic_capacity_ = other.dynamic_capacity_; - if (dynamic_capacity_) { - data_ = other.data_; - other.data_ = other.static_data_; - other.dynamic_capacity_ = 0; - other.size_ = 0; - } else if (size_ != 0) { - // Use a compile-time memcpy size (N) for trivial types - storage_type::move_construct_several(other.static_data_, static_data_, size_, N); - } - } - - constexpr size_t capacity() const { return dynamic_capacity_ ? dynamic_capacity_ : N; } - - constexpr size_t max_size() const { return std::numeric_limits::max(); } - - void reserve(size_t n) { - if (dynamic_capacity_) { - if (n > dynamic_capacity_) { - reallocate_dynamic(n); - } - } else if (n > N) { - switch_to_dynamic(n); - } - } - - void clear() { - storage_type::destroy_several(data_, size_); - size_ = 0; - } - - private: - void switch_to_dynamic(size_t new_capacity) { - dynamic_capacity_ = new_capacity; - data_ = new storage_type[new_capacity]; - storage_type::move_construct_several_and_destroy_source(static_data_, data_, size_); - } - - void reallocate_dynamic(size_t new_capacity) { - assert(new_capacity >= size_); - auto new_data = new storage_type[new_capacity]; - storage_type::move_construct_several_and_destroy_source(data_, new_data, size_); - delete[] data_; - dynamic_capacity_ = new_capacity; - data_ = new_data; - } -}; - -template -class StaticVectorImpl { - private: - Storage storage_; - - T* data_ptr() { return storage_.storage_ptr()->get(); } - - constexpr const T* const_data_ptr() const { - return storage_.const_storage_ptr()->get(); - } - - public: - using size_type = size_t; - using difference_type = ptrdiff_t; - using value_type = T; - using pointer = T*; - using const_pointer = const T*; - using reference = T&; - using const_reference = const T&; - using iterator = T*; - using const_iterator = const T*; - using reverse_iterator = std::reverse_iterator; - using const_reverse_iterator = std::reverse_iterator; - - constexpr StaticVectorImpl() noexcept = default; - - // Move and copy constructors - StaticVectorImpl(StaticVectorImpl&& other) noexcept { - storage_.move_construct(std::move(other.storage_)); - } - - StaticVectorImpl& operator=(StaticVectorImpl&& other) noexcept { - if (ARROW_PREDICT_TRUE(&other != this)) { - // TODO move_assign? - storage_.destroy(); - storage_.move_construct(std::move(other.storage_)); - } - return *this; - } - - StaticVectorImpl(const StaticVectorImpl& other) { - init_by_copying(other.storage_.size_, other.const_data_ptr()); - } - - StaticVectorImpl& operator=(const StaticVectorImpl& other) noexcept { - if (ARROW_PREDICT_TRUE(&other != this)) { - assign_by_copying(other.storage_.size_, other.data()); - } - return *this; - } - - // Automatic conversion from std::vector, for convenience - StaticVectorImpl(const std::vector& other) { // NOLINT: explicit - init_by_copying(other.size(), other.data()); - } - - StaticVectorImpl(std::vector&& other) noexcept { // NOLINT: explicit - init_by_moving(other.size(), other.data()); - } - - StaticVectorImpl& operator=(const std::vector& other) { - assign_by_copying(other.size(), other.data()); - return *this; - } - - StaticVectorImpl& operator=(std::vector&& other) noexcept { - assign_by_moving(other.size(), other.data()); - return *this; - } - - // Constructing from count and optional initialization value - explicit StaticVectorImpl(size_t count) { - storage_.bump_size(count); - auto* p = storage_.storage_ptr(); - for (size_t i = 0; i < count; ++i) { - p[i].construct(); - } - } - - StaticVectorImpl(size_t count, const T& value) { - storage_.bump_size(count); - auto* p = storage_.storage_ptr(); - for (size_t i = 0; i < count; ++i) { - p[i].construct(value); - } - } - - StaticVectorImpl(std::initializer_list values) { - storage_.bump_size(values.size()); - auto* p = storage_.storage_ptr(); - for (auto&& v : values) { - // Unfortunately, cannot move initializer values - p++->construct(v); - } - } - - // Size inspection - - constexpr bool empty() const { return storage_.size_ == 0; } - - constexpr size_t size() const { return storage_.size_; } - - constexpr size_t capacity() const { return storage_.capacity(); } - - constexpr size_t max_size() const { return storage_.max_size(); } - - // Data access - - T& operator[](size_t i) { return data_ptr()[i]; } - - constexpr const T& operator[](size_t i) const { return const_data_ptr()[i]; } - - T& front() { return data_ptr()[0]; } - - constexpr const T& front() const { return const_data_ptr()[0]; } - - T& back() { return data_ptr()[storage_.size_ - 1]; } - - constexpr const T& back() const { return const_data_ptr()[storage_.size_ - 1]; } - - T* data() { return data_ptr(); } - - constexpr const T* data() const { return const_data_ptr(); } - - // Iterators - - iterator begin() { return iterator(data_ptr()); } - - constexpr const_iterator begin() const { return const_iterator(const_data_ptr()); } - - constexpr const_iterator cbegin() const { return const_iterator(const_data_ptr()); } - - iterator end() { return iterator(data_ptr() + storage_.size_); } - - constexpr const_iterator end() const { - return const_iterator(const_data_ptr() + storage_.size_); - } - - constexpr const_iterator cend() const { - return const_iterator(const_data_ptr() + storage_.size_); - } - - reverse_iterator rbegin() { return reverse_iterator(end()); } - - constexpr const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - - constexpr const_reverse_iterator crbegin() const { - return const_reverse_iterator(end()); - } - - reverse_iterator rend() { return reverse_iterator(begin()); } - - constexpr const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } - - constexpr const_reverse_iterator crend() const { - return const_reverse_iterator(begin()); - } - - // Mutations - - void reserve(size_t n) { storage_.reserve(n); } - - void clear() { storage_.clear(); } - - void push_back(const T& value) { - storage_.bump_size(1); - storage_.storage_ptr()[storage_.size_ - 1].construct(value); - } - - void push_back(T&& value) { - storage_.bump_size(1); - storage_.storage_ptr()[storage_.size_ - 1].construct(std::move(value)); - } - - template - void emplace_back(Args&&... args) { - storage_.bump_size(1); - storage_.storage_ptr()[storage_.size_ - 1].construct(std::forward(args)...); - } - - template - iterator insert(const_iterator insert_at, InputIt first, InputIt last) { - const size_t n = storage_.size_; - const size_t it_size = static_cast(last - first); // XXX might be O(n)? - const size_t pos = static_cast(insert_at - const_data_ptr()); - storage_.bump_size(it_size); - auto* p = storage_.storage_ptr(); - if (it_size == 0) { - return p[pos].get(); - } - const size_t end_pos = pos + it_size; - - // Move [pos; n) to [end_pos; end_pos + n - pos) - size_t i = n; - size_t j = end_pos + n - pos; - while (j > std::max(n, end_pos)) { - p[--j].move_construct(&p[--i]); - } - while (j > end_pos) { - p[--j].move_assign(&p[--i]); - } - assert(j == end_pos); - // Copy [first; last) to [pos; end_pos) - j = pos; - while (j < std::min(n, end_pos)) { - p[j++].assign(*first++); - } - while (j < end_pos) { - p[j++].construct(*first++); - } - assert(first == last); - return p[pos].get(); - } - - void resize(size_t n) { - const size_t old_size = storage_.size_; - if (n > storage_.size_) { - storage_.bump_size(n - old_size); - auto* p = storage_.storage_ptr(); - for (size_t i = old_size; i < n; ++i) { - p[i].construct(T{}); - } - } else { - auto* p = storage_.storage_ptr(); - for (size_t i = n; i < old_size; ++i) { - p[i].destroy(); - } - storage_.reduce_size(old_size - n); - } - } - - void resize(size_t n, const T& value) { - const size_t old_size = storage_.size_; - if (n > storage_.size_) { - storage_.bump_size(n - old_size); - auto* p = storage_.storage_ptr(); - for (size_t i = old_size; i < n; ++i) { - p[i].construct(value); - } - } else { - auto* p = storage_.storage_ptr(); - for (size_t i = n; i < old_size; ++i) { - p[i].destroy(); - } - storage_.reduce_size(old_size - n); - } - } - - private: - template - void init_by_copying(size_t n, InputIt src) { - storage_.bump_size(n); - auto* dest = storage_.storage_ptr(); - for (size_t i = 0; i < n; ++i, ++src) { - dest[i].construct(*src); - } - } - - template - void init_by_moving(size_t n, InputIt src) { - init_by_copying(n, std::make_move_iterator(src)); - } - - template - void assign_by_copying(size_t n, InputIt src) { - const size_t old_size = storage_.size_; - if (n > old_size) { - storage_.bump_size(n - old_size); - auto* dest = storage_.storage_ptr(); - for (size_t i = 0; i < old_size; ++i, ++src) { - dest[i].assign(*src); - } - for (size_t i = old_size; i < n; ++i, ++src) { - dest[i].construct(*src); - } - } else { - auto* dest = storage_.storage_ptr(); - for (size_t i = 0; i < n; ++i, ++src) { - dest[i].assign(*src); - } - for (size_t i = n; i < old_size; ++i) { - dest[i].destroy(); - } - storage_.reduce_size(old_size - n); - } - } - - template - void assign_by_moving(size_t n, InputIt src) { - assign_by_copying(n, std::make_move_iterator(src)); - } -}; - -template -using StaticVector = StaticVectorImpl>; - -template -using SmallVector = StaticVectorImpl>; - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/sort.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/sort.h deleted file mode 100644 index cdffe0b2..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/sort.h +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace arrow { -namespace internal { - -template > -std::vector ArgSort(const std::vector& values, Cmp&& cmp = {}) { - std::vector indices(values.size()); - std::iota(indices.begin(), indices.end(), 0); - std::sort(indices.begin(), indices.end(), - [&](int64_t i, int64_t j) -> bool { return cmp(values[i], values[j]); }); - return indices; -} - -template -size_t Permute(const std::vector& indices, std::vector* values) { - if (indices.size() <= 1) { - return indices.size(); - } - - // mask indicating which of values are in the correct location - std::vector sorted(indices.size(), false); - - size_t cycle_count = 0; - - for (auto cycle_start = sorted.begin(); cycle_start != sorted.end(); - cycle_start = std::find(cycle_start, sorted.end(), false)) { - ++cycle_count; - - // position in which an element belongs WRT sort - auto sort_into = static_cast(cycle_start - sorted.begin()); - - if (indices[sort_into] == sort_into) { - // trivial cycle - sorted[sort_into] = true; - continue; - } - - // resolve this cycle - const auto end = sort_into; - for (int64_t take_from = indices[sort_into]; take_from != end; - take_from = indices[sort_into]) { - std::swap(values->at(sort_into), values->at(take_from)); - sorted[sort_into] = true; - sort_into = take_from; - } - sorted[sort_into] = true; - } - - return cycle_count; -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/spaced.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/spaced.h deleted file mode 100644 index 8265e1d2..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/spaced.h +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "arrow/util/bit_run_reader.h" - -namespace arrow { -namespace util { -namespace internal { - -/// \brief Compress the buffer to spaced, excluding the null entries. -/// -/// \param[in] src the source buffer -/// \param[in] num_values the size of source buffer -/// \param[in] valid_bits bitmap data indicating position of valid slots -/// \param[in] valid_bits_offset offset into valid_bits -/// \param[out] output the output buffer spaced -/// \return The size of spaced buffer. -template -inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits, - int64_t valid_bits_offset, T* output) { - int num_valid_values = 0; - - arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values); - while (true) { - const auto run = reader.NextRun(); - if (run.length == 0) { - break; - } - std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T)); - num_valid_values += static_cast(run.length); - } - - return num_valid_values; -} - -/// \brief Relocate values in buffer into positions of non-null values as indicated by -/// a validity bitmap. -/// -/// \param[in, out] buffer the in-place buffer -/// \param[in] num_values total size of buffer including null slots -/// \param[in] null_count number of null slots -/// \param[in] valid_bits bitmap data indicating position of valid slots -/// \param[in] valid_bits_offset offset into valid_bits -/// \return The number of values expanded, including nulls. -template -inline int SpacedExpand(T* buffer, int num_values, int null_count, - const uint8_t* valid_bits, int64_t valid_bits_offset) { - // Point to end as we add the spacing from the back. - int idx_decode = num_values - null_count; - - // Depending on the number of nulls, some of the value slots in buffer may - // be uninitialized, and this will cause valgrind warnings / potentially UB - std::memset(static_cast(buffer + idx_decode), 0, null_count * sizeof(T)); - if (idx_decode == 0) { - // All nulls, nothing more to do - return num_values; - } - - arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset, - num_values); - while (true) { - const auto run = reader.NextRun(); - if (run.length == 0) { - break; - } - idx_decode -= static_cast(run.length); - assert(idx_decode >= 0); - std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T)); - } - - // Otherwise caller gave an incorrect null_count - assert(idx_decode == 0); - return num_values; -} - -} // namespace internal -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/span.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/span.h deleted file mode 100644 index 4254fec7..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/span.h +++ /dev/null @@ -1,132 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -namespace arrow::util { - -/// std::span polyfill. -/// -/// Does not support static extents. -template -class span { - static_assert(sizeof(T), - R"( -std::span allows contiguous_iterators instead of just pointers, the enforcement -of which requires T to be a complete type. arrow::util::span does not support -contiguous_iterators, but T is still required to be a complete type to prevent -writing code which would break when it is replaced by std::span.)"); - - public: - using element_type = T; - using value_type = std::remove_cv_t; - using iterator = T*; - using const_iterator = T const*; - - span() = default; - span(const span&) = default; - span& operator=(const span&) = default; - - template >> - // NOLINTNEXTLINE runtime/explicit - constexpr span(span mut) : span{mut.data(), mut.size()} {} - - constexpr span(T* data, size_t count) : data_{data}, size_{count} {} - - constexpr span(T* begin, T* end) - : data_{begin}, size_{static_cast(end - begin)} {} - - template < - typename R, - typename DisableUnlessConstructibleFromDataAndSize = - decltype(span(std::data(std::declval()), std::size(std::declval()))), - typename DisableUnlessSimilarTypes = std::enable_if_t()))>>, - std::decay_t>>> - // NOLINTNEXTLINE runtime/explicit, non-const reference - constexpr span(R&& range) : span{std::data(range), std::size(range)} {} - - constexpr T* begin() const { return data_; } - constexpr T* end() const { return data_ + size_; } - constexpr T* data() const { return data_; } - - constexpr size_t size() const { return size_; } - constexpr size_t size_bytes() const { return size_ * sizeof(T); } - constexpr bool empty() const { return size_ == 0; } - - constexpr T& operator[](size_t i) { return data_[i]; } - constexpr const T& operator[](size_t i) const { return data_[i]; } - - constexpr span subspan(size_t offset) const { - if (offset > size_) return {data_, data_}; - return {data_ + offset, size_ - offset}; - } - - constexpr span subspan(size_t offset, size_t count) const { - auto out = subspan(offset); - if (count < out.size_) { - out.size_ = count; - } - return out; - } - - constexpr bool operator==(span const& other) const { - if (size_ != other.size_) return false; - - if constexpr (std::is_integral_v) { - if (size_ == 0) { - return true; // memcmp does not handle null pointers, even if size_ == 0 - } - return std::memcmp(data_, other.data_, size_bytes()) == 0; - } else { - T* ptr = data_; - for (T const& e : other) { - if (*ptr++ != e) return false; - } - return true; - } - } - constexpr bool operator!=(span const& other) const { return !(*this == other); } - - private: - T* data_{}; - size_t size_{}; -}; - -template -span(R& range) -> span>; - -template -span(T*, size_t) -> span; - -template -constexpr span as_bytes(span s) { - return {reinterpret_cast(s.data()), s.size_bytes()}; -} - -template -constexpr span as_writable_bytes(span s) { - return {reinterpret_cast(s.data()), s.size_bytes()}; -} - -} // namespace arrow::util diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/stopwatch.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/stopwatch.h deleted file mode 100644 index db4e67f5..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/stopwatch.h +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -namespace arrow { -namespace internal { - -class StopWatch { - // This clock should give us wall clock time - using ClockType = std::chrono::steady_clock; - - public: - StopWatch() {} - - void Start() { start_ = ClockType::now(); } - - // Returns time in nanoseconds. - uint64_t Stop() { - auto stop = ClockType::now(); - std::chrono::nanoseconds d = stop - start_; - assert(d.count() >= 0); - return static_cast(d.count()); - } - - private: - std::chrono::time_point start_; -}; - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/string.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/string.h deleted file mode 100644 index d7e37777..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/string.h +++ /dev/null @@ -1,173 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#if __has_include() -#include -#endif - -#include "arrow/result.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -class Status; - -ARROW_EXPORT std::string HexEncode(const uint8_t* data, size_t length); - -ARROW_EXPORT std::string Escape(const char* data, size_t length); - -ARROW_EXPORT std::string HexEncode(const char* data, size_t length); - -ARROW_EXPORT std::string HexEncode(std::string_view str); - -ARROW_EXPORT std::string Escape(std::string_view str); - -ARROW_EXPORT Status ParseHexValue(const char* hex_pair, uint8_t* out); - -ARROW_EXPORT Status ParseHexValues(std::string_view hex_string, uint8_t* out); - -namespace internal { - -/// Like std::string_view::starts_with in C++20 -inline bool StartsWith(std::string_view s, std::string_view prefix) { - return s.length() >= prefix.length() && - (s.empty() || s.substr(0, prefix.length()) == prefix); -} - -/// Like std::string_view::ends_with in C++20 -inline bool EndsWith(std::string_view s, std::string_view suffix) { - return s.length() >= suffix.length() && - (s.empty() || s.substr(s.length() - suffix.length()) == suffix); -} - -/// \brief Split a string with a delimiter -ARROW_EXPORT -std::vector SplitString(std::string_view v, char delim, - int64_t limit = 0); - -/// \brief Join strings with a delimiter -ARROW_EXPORT -std::string JoinStrings(const std::vector& strings, - std::string_view delimiter); - -/// \brief Join strings with a delimiter -ARROW_EXPORT -std::string JoinStrings(const std::vector& strings, - std::string_view delimiter); - -/// \brief Trim whitespace from left and right sides of string -ARROW_EXPORT -std::string TrimString(std::string value); - -ARROW_EXPORT -bool AsciiEqualsCaseInsensitive(std::string_view left, std::string_view right); - -ARROW_EXPORT -std::string AsciiToLower(std::string_view value); - -ARROW_EXPORT -std::string AsciiToUpper(std::string_view value); - -/// \brief Search for the first instance of a token and replace it or return nullopt if -/// the token is not found. -ARROW_EXPORT -std::optional Replace(std::string_view s, std::string_view token, - std::string_view replacement); - -/// \brief Get boolean value from string -/// -/// If "1", "true" (case-insensitive), returns true -/// If "0", "false" (case-insensitive), returns false -/// Otherwise, returns Status::Invalid -ARROW_EXPORT -arrow::Result ParseBoolean(std::string_view value); - -#if __has_include() - -namespace detail { -template -struct can_to_chars : public std::false_type {}; - -template -struct can_to_chars< - T, std::void_t(), std::declval(), - std::declval>()))>> - : public std::true_type {}; -} // namespace detail - -/// \brief Whether std::to_chars exists for the current value type. -/// -/// This is useful as some C++ libraries do not implement all specified overloads -/// for std::to_chars. -template -inline constexpr bool have_to_chars = detail::can_to_chars::value; - -/// \brief An ergonomic wrapper around std::to_chars, returning a std::string -/// -/// For most inputs, the std::string result will not incur any heap allocation -/// thanks to small string optimization. -/// -/// Compared to std::to_string, this function gives locale-agnostic results -/// and might also be faster. -template -std::string ToChars(T value, Args&&... args) { - if constexpr (!have_to_chars) { - // Some C++ standard libraries do not yet implement std::to_chars for all types, - // in which case we have to fallback to std::string. - return std::to_string(value); - } else { - // According to various sources, the GNU libstdc++ and Microsoft's C++ STL - // allow up to 15 bytes of small string optimization, while clang's libc++ - // goes up to 22 bytes. Choose the pessimistic value. - std::string out(15, 0); - auto res = std::to_chars(&out.front(), &out.back(), value, args...); - while (res.ec != std::errc{}) { - assert(res.ec == std::errc::value_too_large); - out.resize(out.capacity() * 2); - res = std::to_chars(&out.front(), &out.back(), value, args...); - } - const auto length = res.ptr - out.data(); - assert(length <= static_cast(out.length())); - out.resize(length); - return out; - } -} - -#else // !__has_include() - -template -inline constexpr bool have_to_chars = false; - -template -std::string ToChars(T value, Args&&... args) { - return std::to_string(value); -} - -#endif - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/string_builder.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/string_builder.h deleted file mode 100644 index 7c05ccd5..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/string_builder.h +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. template - -#pragma once - -#include -#include -#include -#include - -#include "arrow/util/visibility.h" - -namespace arrow { -namespace util { - -namespace detail { - -class ARROW_EXPORT StringStreamWrapper { - public: - StringStreamWrapper(); - ~StringStreamWrapper(); - - std::ostream& stream() { return ostream_; } - std::string str(); - - protected: - std::unique_ptr sstream_; - std::ostream& ostream_; -}; - -} // namespace detail - -template -void StringBuilderRecursive(std::ostream& stream, Head&& head) { - stream << head; -} - -template -void StringBuilderRecursive(std::ostream& stream, Head&& head, Tail&&... tail) { - StringBuilderRecursive(stream, std::forward(head)); - StringBuilderRecursive(stream, std::forward(tail)...); -} - -template -std::string StringBuilder(Args&&... args) { - detail::StringStreamWrapper ss; - StringBuilderRecursive(ss.stream(), std::forward(args)...); - return ss.str(); -} - -/// CRTP helper for declaring string representation. Defines operator<< -template -class ToStringOstreamable { - public: - ~ToStringOstreamable() { - static_assert( - std::is_same().ToString()), std::string>::value, - "ToStringOstreamable depends on the method T::ToString() const"); - } - - private: - const T& cast() const { return static_cast(*this); } - - friend inline std::ostream& operator<<(std::ostream& os, const ToStringOstreamable& t) { - return os << t.cast().ToString(); - } -}; - -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/task_group.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/task_group.h deleted file mode 100644 index 3bb72f0d..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/task_group.h +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/status.h" -#include "arrow/type_fwd.h" -#include "arrow/util/cancel.h" -#include "arrow/util/functional.h" -#include "arrow/util/macros.h" -#include "arrow/util/type_fwd.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace internal { - -/// \brief A group of related tasks -/// -/// A TaskGroup executes tasks with the signature `Status()`. -/// Execution can be serial or parallel, depending on the TaskGroup -/// implementation. When Finish() returns, it is guaranteed that all -/// tasks have finished, or at least one has errored. -/// -/// Once an error has occurred any tasks that are submitted to the task group -/// will not run. The call to Append will simply return without scheduling the -/// task. -/// -/// If the task group is parallel it is possible that multiple tasks could be -/// running at the same time and one of those tasks fails. This will put the -/// task group in a failure state (so additional tasks cannot be run) however -/// it will not interrupt running tasks. Finish will not complete -/// until all running tasks have finished, even if one task fails. -/// -/// Once a task group has finished new tasks may not be added to it. If you need to start -/// a new batch of work then you should create a new task group. -class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this { - public: - /// Add a Status-returning function to execute. Execution order is - /// undefined. The function may be executed immediately or later. - template - void Append(Function&& func) { - return AppendReal(std::forward(func)); - } - - /// Wait for execution of all tasks (and subgroups) to be finished, - /// or for at least one task (or subgroup) to error out. - /// The returned Status propagates the error status of the first failing - /// task (or subgroup). - virtual Status Finish() = 0; - - /// Returns a future that will complete the first time all tasks are finished. - /// This should be called only after all top level tasks - /// have been added to the task group. - /// - /// If you are using a TaskGroup asynchronously there are a few considerations to keep - /// in mind. The tasks should not block on I/O, etc (defeats the purpose of using - /// futures) and should not be doing any nested locking or you run the risk of the tasks - /// getting stuck in the thread pool waiting for tasks which cannot get scheduled. - /// - /// Primarily this call is intended to help migrate existing work written with TaskGroup - /// in mind to using futures without having to do a complete conversion on the first - /// pass. - virtual Future<> FinishAsync() = 0; - - /// The current aggregate error Status. Non-blocking, useful for stopping early. - virtual Status current_status() = 0; - - /// Whether some tasks have already failed. Non-blocking, useful for stopping early. - virtual bool ok() const = 0; - - /// How many tasks can typically be executed in parallel. - /// This is only a hint, useful for testing or debugging. - virtual int parallelism() = 0; - - static std::shared_ptr MakeSerial(StopToken = StopToken::Unstoppable()); - static std::shared_ptr MakeThreaded(internal::Executor*, - StopToken = StopToken::Unstoppable()); - - virtual ~TaskGroup() = default; - - protected: - TaskGroup() = default; - ARROW_DISALLOW_COPY_AND_ASSIGN(TaskGroup); - - virtual void AppendReal(FnOnce task) = 0; -}; - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/tdigest.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/tdigest.h deleted file mode 100644 index 308df468..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/tdigest.h +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// approximate quantiles from arbitrary length dataset with O(1) space -// based on 'Computing Extremely Accurate Quantiles Using t-Digests' from Dunning & Ertl -// - https://arxiv.org/abs/1902.04023 -// - https://github.com/tdunning/t-digest - -#pragma once - -#include -#include -#include - -#include "arrow/util/logging.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -class Status; - -namespace internal { - -class ARROW_EXPORT TDigest { - public: - explicit TDigest(uint32_t delta = 100, uint32_t buffer_size = 500); - ~TDigest(); - TDigest(TDigest&&); - TDigest& operator=(TDigest&&); - - // reset and re-use this tdigest - void Reset(); - - // validate data integrity - Status Validate() const; - - // dump internal data, only for debug - void Dump() const; - - // buffer a single data point, consume internal buffer if full - // this function is intensively called and performance critical - // call it only if you are sure no NAN exists in input data - void Add(double value) { - DCHECK(!std::isnan(value)) << "cannot add NAN"; - if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) { - MergeInput(); - } - input_.push_back(value); - } - - // skip NAN on adding - template - typename std::enable_if::value>::type NanAdd(T value) { - if (!std::isnan(value)) Add(value); - } - - template - typename std::enable_if::value>::type NanAdd(T value) { - Add(static_cast(value)); - } - - // merge with other t-digests, called infrequently - void Merge(const std::vector& others); - void Merge(const TDigest& other); - - // calculate quantile - double Quantile(double q) const; - - double Min() const { return Quantile(0); } - double Max() const { return Quantile(1); } - double Mean() const; - - // check if this tdigest contains no valid data points - bool is_empty() const; - - private: - // merge input data with current tdigest - void MergeInput() const; - - // input buffer, size = buffer_size * sizeof(double) - mutable std::vector input_; - - // hide other members with pimpl - class TDigestImpl; - std::unique_ptr impl_; -}; - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/test_common.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/test_common.h deleted file mode 100644 index 511daed1..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/test_common.h +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "arrow/testing/gtest_util.h" -#include "arrow/util/iterator.h" - -namespace arrow { - -struct TestInt { - TestInt(); - TestInt(int i); // NOLINT runtime/explicit - int value; - - bool operator==(const TestInt& other) const; - - friend std::ostream& operator<<(std::ostream& os, const TestInt& v); -}; - -template <> -struct IterationTraits { - static TestInt End() { return TestInt(); } - static bool IsEnd(const TestInt& val) { return val == IterationTraits::End(); } -}; - -struct TestStr { - TestStr(); - TestStr(const std::string& s); // NOLINT runtime/explicit - TestStr(const char* s); // NOLINT runtime/explicit - explicit TestStr(const TestInt& test_int); - std::string value; - - bool operator==(const TestStr& other) const; - - friend std::ostream& operator<<(std::ostream& os, const TestStr& v); -}; - -template <> -struct IterationTraits { - static TestStr End() { return TestStr(); } - static bool IsEnd(const TestStr& val) { return val == IterationTraits::End(); } -}; - -std::vector RangeVector(unsigned int max, unsigned int step = 1); - -template -inline Iterator VectorIt(std::vector v) { - return MakeVectorIterator(std::move(v)); -} - -template -inline Iterator PossiblySlowVectorIt(std::vector v, bool slow = false) { - auto iterator = MakeVectorIterator(std::move(v)); - if (slow) { - return MakeTransformedIterator(std::move(iterator), - [](T item) -> Result> { - SleepABit(); - return TransformYield(item); - }); - } else { - return iterator; - } -} - -template -inline void AssertIteratorExhausted(Iterator& it) { - ASSERT_OK_AND_ASSIGN(T next, it.Next()); - ASSERT_TRUE(IsIterationEnd(next)); -} - -Transformer MakeFilter(std::function filter); - -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/thread_pool.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/thread_pool.h deleted file mode 100644 index 44b1e227..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/thread_pool.h +++ /dev/null @@ -1,620 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/util/cancel.h" -#include "arrow/util/config.h" -#include "arrow/util/functional.h" -#include "arrow/util/future.h" -#include "arrow/util/iterator.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -#if defined(_MSC_VER) -// Disable harmless warning for decorated name length limit -#pragma warning(disable : 4503) -#endif - -namespace arrow { - -/// \brief Get the capacity of the global thread pool -/// -/// Return the number of worker threads in the thread pool to which -/// Arrow dispatches various CPU-bound tasks. This is an ideal number, -/// not necessarily the exact number of threads at a given point in time. -/// -/// You can change this number using SetCpuThreadPoolCapacity(). -ARROW_EXPORT int GetCpuThreadPoolCapacity(); - -/// \brief Set the capacity of the global thread pool -/// -/// Set the number of worker threads int the thread pool to which -/// Arrow dispatches various CPU-bound tasks. -/// -/// The current number is returned by GetCpuThreadPoolCapacity(). -ARROW_EXPORT Status SetCpuThreadPoolCapacity(int threads); - -namespace internal { - -// Hints about a task that may be used by an Executor. -// They are ignored by the provided ThreadPool implementation. -struct TaskHints { - // The lower, the more urgent - int32_t priority = 0; - // The IO transfer size in bytes - int64_t io_size = -1; - // The approximate CPU cost in number of instructions - int64_t cpu_cost = -1; - // An application-specific ID - int64_t external_id = -1; -}; - -class ARROW_EXPORT Executor { - public: - using StopCallback = internal::FnOnce; - - virtual ~Executor(); - - // Spawn a fire-and-forget task. - template - Status Spawn(Function&& func) { - return SpawnReal(TaskHints{}, std::forward(func), StopToken::Unstoppable(), - StopCallback{}); - } - template - Status Spawn(Function&& func, StopToken stop_token) { - return SpawnReal(TaskHints{}, std::forward(func), std::move(stop_token), - StopCallback{}); - } - template - Status Spawn(TaskHints hints, Function&& func) { - return SpawnReal(hints, std::forward(func), StopToken::Unstoppable(), - StopCallback{}); - } - template - Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) { - return SpawnReal(hints, std::forward(func), std::move(stop_token), - StopCallback{}); - } - template - Status Spawn(TaskHints hints, Function&& func, StopToken stop_token, - StopCallback stop_callback) { - return SpawnReal(hints, std::forward(func), std::move(stop_token), - std::move(stop_callback)); - } - - // Transfers a future to this executor. Any continuations added to the - // returned future will run in this executor. Otherwise they would run - // on the same thread that called MarkFinished. - // - // This is necessary when (for example) an I/O task is completing a future. - // The continuations of that future should run on the CPU thread pool keeping - // CPU heavy work off the I/O thread pool. So the I/O task should transfer - // the future to the CPU executor before returning. - // - // By default this method will only transfer if the future is not already completed. If - // the future is already completed then any callback would be run synchronously and so - // no transfer is typically necessary. However, in cases where you want to force a - // transfer (e.g. to help the scheduler break up units of work across multiple cores) - // then you can override this behavior with `always_transfer`. - template - Future Transfer(Future future) { - return DoTransfer(std::move(future), false); - } - - // Overload of Transfer which will always schedule callbacks on new threads even if the - // future is finished when the callback is added. - // - // This can be useful in cases where you want to ensure parallelism - template - Future TransferAlways(Future future) { - return DoTransfer(std::move(future), true); - } - - // Submit a callable and arguments for execution. Return a future that - // will return the callable's result value once. - // The callable's arguments are copied before execution. - template > - Result Submit(TaskHints hints, StopToken stop_token, Function&& func, - Args&&... args) { - using ValueType = typename FutureType::ValueType; - - auto future = FutureType::Make(); - auto task = std::bind(::arrow::detail::ContinueFuture{}, future, - std::forward(func), std::forward(args)...); - struct { - WeakFuture weak_fut; - - void operator()(const Status& st) { - auto fut = weak_fut.get(); - if (fut.is_valid()) { - fut.MarkFinished(st); - } - } - } stop_callback{WeakFuture(future)}; - ARROW_RETURN_NOT_OK(SpawnReal(hints, std::move(task), std::move(stop_token), - std::move(stop_callback))); - - return future; - } - - template > - Result Submit(StopToken stop_token, Function&& func, Args&&... args) { - return Submit(TaskHints{}, stop_token, std::forward(func), - std::forward(args)...); - } - - template > - Result Submit(TaskHints hints, Function&& func, Args&&... args) { - return Submit(std::move(hints), StopToken::Unstoppable(), - std::forward(func), std::forward(args)...); - } - - template > - Result Submit(Function&& func, Args&&... args) { - return Submit(TaskHints{}, StopToken::Unstoppable(), std::forward(func), - std::forward(args)...); - } - - // Return the level of parallelism (the number of tasks that may be executed - // concurrently). This may be an approximate number. - virtual int GetCapacity() = 0; - - // Return true if the thread from which this function is called is owned by this - // Executor. Returns false if this Executor does not support this property. - virtual bool OwnsThisThread() { return false; } - - // Return true if this is the current executor being called - // n.b. this defaults to just calling OwnsThisThread - // unless the threadpool is disabled - virtual bool IsCurrentExecutor() { return OwnsThisThread(); } - - /// \brief An interface to represent something with a custom destructor - /// - /// \see KeepAlive - class ARROW_EXPORT Resource { - public: - virtual ~Resource() = default; - }; - - /// \brief Keep a resource alive until all executor threads have terminated - /// - /// Executors may have static storage duration. In particular, the CPU and I/O - /// executors are currently implemented this way. These threads may access other - /// objects with static storage duration such as the OpenTelemetry runtime context - /// the default memory pool, or other static executors. - /// - /// The order in which these objects are destroyed is difficult to control. In order - /// to ensure those objects remain alive until all threads have finished those objects - /// should be wrapped in a Resource object and passed into this method. The given - /// shared_ptr will be kept alive until all threads have finished their worker loops. - virtual void KeepAlive(std::shared_ptr resource); - - protected: - ARROW_DISALLOW_COPY_AND_ASSIGN(Executor); - - Executor() = default; - - template , typename FTSync = typename FT::SyncType> - Future DoTransfer(Future future, bool always_transfer = false) { - auto transferred = Future::Make(); - if (always_transfer) { - CallbackOptions callback_options = CallbackOptions::Defaults(); - callback_options.should_schedule = ShouldSchedule::Always; - callback_options.executor = this; - auto sync_callback = [transferred](const FTSync& result) mutable { - transferred.MarkFinished(result); - }; - future.AddCallback(sync_callback, callback_options); - return transferred; - } - - // We could use AddCallback's ShouldSchedule::IfUnfinished but we can save a bit of - // work by doing the test here. - auto callback = [this, transferred](const FTSync& result) mutable { - auto spawn_status = - Spawn([transferred, result]() mutable { transferred.MarkFinished(result); }); - if (!spawn_status.ok()) { - transferred.MarkFinished(spawn_status); - } - }; - auto callback_factory = [&callback]() { return callback; }; - if (future.TryAddCallback(callback_factory)) { - return transferred; - } - // If the future is already finished and we aren't going to force spawn a thread - // then we don't need to add another layer of callback and can return the original - // future - return future; - } - - // Subclassing API - virtual Status SpawnReal(TaskHints hints, FnOnce task, StopToken, - StopCallback&&) = 0; -}; - -/// \brief An executor implementation that runs all tasks on a single thread using an -/// event loop. -/// -/// Note: Any sort of nested parallelism will deadlock this executor. Blocking waits are -/// fine but if one task needs to wait for another task it must be expressed as an -/// asynchronous continuation. -class ARROW_EXPORT SerialExecutor : public Executor { - public: - template - using TopLevelTask = internal::FnOnce(Executor*)>; - - ~SerialExecutor() override; - - int GetCapacity() override { return 1; }; - bool OwnsThisThread() override; - Status SpawnReal(TaskHints hints, FnOnce task, StopToken, - StopCallback&&) override; - - // Return the number of tasks either running or in the queue. - int GetNumTasks(); - - /// \brief Runs the TopLevelTask and any scheduled tasks - /// - /// The TopLevelTask (or one of the tasks it schedules) must either return an invalid - /// status or call the finish signal. Failure to do this will result in a deadlock. For - /// this reason it is preferable (if possible) to use the helper methods (below) - /// RunSynchronously/RunSerially which delegates the responsibility onto a Future - /// producer's existing responsibility to always mark a future finished (which can - /// someday be aided by ARROW-12207). - template , - typename FTSync = typename FT::SyncType> - static FTSync RunInSerialExecutor(TopLevelTask initial_task) { - Future fut = SerialExecutor().Run(std::move(initial_task)); - return FutureToSync(fut); - } - - /// \brief Transform an AsyncGenerator into an Iterator - /// - /// An event loop will be created and each call to Next will power the event loop with - /// the calling thread until the next item is ready to be delivered. - /// - /// Note: The iterator's destructor will run until the given generator is fully - /// exhausted. If you wish to abandon iteration before completion then the correct - /// approach is to use a stop token to cause the generator to exhaust early. - template - static Iterator IterateGenerator( - internal::FnOnce()>>(Executor*)> initial_task) { - auto serial_executor = std::unique_ptr(new SerialExecutor()); - auto maybe_generator = std::move(initial_task)(serial_executor.get()); - if (!maybe_generator.ok()) { - return MakeErrorIterator(maybe_generator.status()); - } - auto generator = maybe_generator.MoveValueUnsafe(); - struct SerialIterator { - SerialIterator(std::unique_ptr executor, - std::function()> generator) - : executor(std::move(executor)), generator(std::move(generator)) {} - ARROW_DISALLOW_COPY_AND_ASSIGN(SerialIterator); - ARROW_DEFAULT_MOVE_AND_ASSIGN(SerialIterator); - ~SerialIterator() { - // A serial iterator must be consumed before it can be destroyed. Allowing it to - // do otherwise would lead to resource leakage. There will likely be deadlocks at - // this spot in the future but these will be the result of other bugs and not the - // fact that we are forcing consumption here. - - // If a streaming API needs to support early abandonment then it should be done so - // with a cancellation token and not simply discarding the iterator and expecting - // the underlying work to clean up correctly. - if (executor && !executor->IsFinished()) { - while (true) { - Result maybe_next = Next(); - if (!maybe_next.ok() || IsIterationEnd(*maybe_next)) { - break; - } - } - } - } - - Result Next() { - executor->Unpause(); - // This call may lead to tasks being scheduled in the serial executor - Future next_fut = generator(); - next_fut.AddCallback([this](const Result& res) { - // If we're done iterating we should drain the rest of the tasks in the executor - if (!res.ok() || IsIterationEnd(*res)) { - executor->Finish(); - return; - } - // Otherwise we will break out immediately, leaving the remaining tasks for - // the next call. - executor->Pause(); - }); -#ifdef ARROW_ENABLE_THREADING - // future must run on this thread - // Borrow this thread and run tasks until the future is finished - executor->RunLoop(); -#else - next_fut.Wait(); -#endif - if (!next_fut.is_finished()) { - // Not clear this is possible since RunLoop wouldn't generally exit - // unless we paused/finished which would imply next_fut has been - // finished. - return Status::Invalid( - "Serial executor terminated before next result computed"); - } - // At this point we may still have tasks in the executor, that is ok. - // We will run those tasks the next time through. - return next_fut.result(); - } - - std::unique_ptr executor; - std::function()> generator; - }; - return Iterator(SerialIterator{std::move(serial_executor), std::move(generator)}); - } - -#ifndef ARROW_ENABLE_THREADING - // run a pending task from loop - // returns true if any tasks were run in the last go round the loop (i.e. if it - // returns false, all executors are waiting) - static bool RunTasksOnAllExecutors(); - static SerialExecutor* GetCurrentExecutor(); - - bool IsCurrentExecutor() override; - -#endif - - protected: - virtual void RunLoop(); - - // State uses mutex - struct State; - std::shared_ptr state_; - - SerialExecutor(); - - // We mark the serial executor "finished" when there should be - // no more tasks scheduled on it. It's not strictly needed but - // can help catch bugs where we are trying to use the executor - // after we are done with it. - void Finish(); - bool IsFinished(); - // We pause the executor when we are running an async generator - // and we have received an item that we can deliver. - void Pause(); - void Unpause(); - - template ::SyncType> - Future Run(TopLevelTask initial_task) { - auto final_fut = std::move(initial_task)(this); - final_fut.AddCallback([this](const FTSync&) { Finish(); }); - RunLoop(); - return final_fut; - } - -#ifndef ARROW_ENABLE_THREADING - // we have to run tasks from all live executors - // during RunLoop if we don't have threading - static std::unordered_set all_executors; - // a pointer to the last one called by the loop - // so all tasks get spawned equally - // on multiple calls to RunTasksOnAllExecutors - static SerialExecutor* last_called_executor; - // without threading we can't tell which executor called the - // current process - so we set it in spawning the task - static SerialExecutor* current_executor; -#endif // ARROW_ENABLE_THREADING -}; - -#ifdef ARROW_ENABLE_THREADING - -/// An Executor implementation spawning tasks in FIFO manner on a fixed-size -/// pool of worker threads. -/// -/// Note: Any sort of nested parallelism will deadlock this executor. Blocking waits are -/// fine but if one task needs to wait for another task it must be expressed as an -/// asynchronous continuation. -class ARROW_EXPORT ThreadPool : public Executor { - public: - // Construct a thread pool with the given number of worker threads - static Result> Make(int threads); - - // Like Make(), but takes care that the returned ThreadPool is compatible - // with destruction late at process exit. - static Result> MakeEternal(int threads); - - // Destroy thread pool; the pool will first be shut down - ~ThreadPool() override; - - // Return the desired number of worker threads. - // The actual number of workers may lag a bit before being adjusted to - // match this value. - int GetCapacity() override; - - // Return the number of tasks either running or in the queue. - int GetNumTasks(); - - bool OwnsThisThread() override; - // Dynamically change the number of worker threads. - // - // This function always returns immediately. - // If fewer threads are running than this number, new threads are spawned - // on-demand when needed for task execution. - // If more threads are running than this number, excess threads are reaped - // as soon as possible. - Status SetCapacity(int threads); - - // Heuristic for the default capacity of a thread pool for CPU-bound tasks. - // This is exposed as a static method to help with testing. - static int DefaultCapacity(); - - // Shutdown the pool. Once the pool starts shutting down, new tasks - // cannot be submitted anymore. - // If "wait" is true, shutdown waits for all pending tasks to be finished. - // If "wait" is false, workers are stopped as soon as currently executing - // tasks are finished. - Status Shutdown(bool wait = true); - - // Wait for the thread pool to become idle - // - // This is useful for sequencing tests - void WaitForIdle(); - - void KeepAlive(std::shared_ptr resource) override; - - struct State; - - protected: - FRIEND_TEST(TestThreadPool, SetCapacity); - FRIEND_TEST(TestGlobalThreadPool, Capacity); - ARROW_FRIEND_EXPORT friend ThreadPool* GetCpuThreadPool(); - - ThreadPool(); - - Status SpawnReal(TaskHints hints, FnOnce task, StopToken, - StopCallback&&) override; - - // Collect finished worker threads, making sure the OS threads have exited - void CollectFinishedWorkersUnlocked(); - // Launch a given number of additional workers - void LaunchWorkersUnlocked(int threads); - // Get the current actual capacity - int GetActualCapacity(); - - static std::shared_ptr MakeCpuThreadPool(); - - std::shared_ptr sp_state_; - State* state_; - bool shutdown_on_destroy_; -}; -#else // ARROW_ENABLE_THREADING -// an executor implementation which pretends to be a thread pool but runs everything -// on the main thread using a static queue (shared between all thread pools, otherwise -// cross-threadpool dependencies will break everything) -class ARROW_EXPORT ThreadPool : public SerialExecutor { - public: - ARROW_FRIEND_EXPORT friend ThreadPool* GetCpuThreadPool(); - - static Result> Make(int threads); - - // Like Make(), but takes care that the returned ThreadPool is compatible - // with destruction late at process exit. - static Result> MakeEternal(int threads); - - // Destroy thread pool; the pool will first be shut down - ~ThreadPool() override; - - // Return the desired number of worker threads. - // The actual number of workers may lag a bit before being adjusted to - // match this value. - int GetCapacity() override; - - virtual int GetActualCapacity(); - - bool OwnsThisThread() override { return true; } - - // Dynamically change the number of worker threads. - // without threading this is equal to the - // number of tasks that can be running at once - // (inside each other) - Status SetCapacity(int threads); - - static int DefaultCapacity() { return 8; } - - // Shutdown the pool. Once the pool starts shutting down, new tasks - // cannot be submitted anymore. - // If "wait" is true, shutdown waits for all pending tasks to be finished. - // If "wait" is false, workers are stopped as soon as currently executing - // tasks are finished. - Status Shutdown(bool wait = true); - - // Wait for the thread pool to become idle - // - // This is useful for sequencing tests - void WaitForIdle(); - - protected: - static std::shared_ptr MakeCpuThreadPool(); - ThreadPool(); -}; - -#endif // ARROW_ENABLE_THREADING - -// Return the process-global thread pool for CPU-bound tasks. -ARROW_EXPORT ThreadPool* GetCpuThreadPool(); - -/// \brief Potentially run an async operation serially (if use_threads is false) -/// \see RunSerially -/// -/// If `use_threads` is true, the global CPU executor is used. -/// If `use_threads` is false, a temporary SerialExecutor is used. -/// `get_future` is called (from this thread) with the chosen executor and must -/// return a future that will eventually finish. This function returns once the -/// future has finished. -template -typename Fut::SyncType RunSynchronously(FnOnce get_future, - bool use_threads) { - if (use_threads) { - auto fut = std::move(get_future)(GetCpuThreadPool()); - return FutureToSync(fut); - } else { - return SerialExecutor::RunInSerialExecutor(std::move(get_future)); - } -} - -/// \brief Potentially iterate an async generator serially (if use_threads is false) -/// \see IterateGenerator -/// -/// If `use_threads` is true, the global CPU executor will be used. Each call to -/// the iterator will simply wait until the next item is available. Tasks may run in -/// the background between calls. -/// -/// If `use_threads` is false, the calling thread only will be used. Each call to -/// the iterator will use the calling thread to do enough work to generate one item. -/// Tasks will be left in a queue until the next call and no work will be done between -/// calls. -template -Iterator IterateSynchronously( - FnOnce()>>(Executor*)> get_gen, bool use_threads) { - if (use_threads) { - auto maybe_gen = std::move(get_gen)(GetCpuThreadPool()); - if (!maybe_gen.ok()) { - return MakeErrorIterator(maybe_gen.status()); - } - return MakeGeneratorIterator(*maybe_gen); - } else { - return SerialExecutor::IterateGenerator(std::move(get_gen)); - } -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/time.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/time.h deleted file mode 100644 index 981eab59..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/time.h +++ /dev/null @@ -1,83 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "arrow/type_fwd.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace util { - -enum DivideOrMultiply { - MULTIPLY, - DIVIDE, -}; - -ARROW_EXPORT -std::pair GetTimestampConversion(TimeUnit::type in_unit, - TimeUnit::type out_unit); - -// Converts a Timestamp value into another Timestamp value. -// -// This function takes care of properly transforming from one unit to another. -// -// \param[in] in the input type. Must be TimestampType. -// \param[in] out the output type. Must be TimestampType. -// \param[in] value the input value. -// -// \return The converted value, or an error. -ARROW_EXPORT Result ConvertTimestampValue(const std::shared_ptr& in, - const std::shared_ptr& out, - int64_t value); - -template -decltype(std::declval()(std::chrono::seconds{}, std::declval()...)) -VisitDuration(TimeUnit::type unit, Visitor&& visitor, Args&&... args) { - switch (unit) { - default: - case TimeUnit::SECOND: - break; - case TimeUnit::MILLI: - return visitor(std::chrono::milliseconds{}, std::forward(args)...); - case TimeUnit::MICRO: - return visitor(std::chrono::microseconds{}, std::forward(args)...); - case TimeUnit::NANO: - return visitor(std::chrono::nanoseconds{}, std::forward(args)...); - } - return visitor(std::chrono::seconds{}, std::forward(args)...); -} - -/// Convert a count of seconds to the corresponding count in a different TimeUnit -struct CastSecondsToUnitImpl { - template - int64_t operator()(Duration, int64_t seconds) { - auto duration = std::chrono::duration_cast(std::chrono::seconds{seconds}); - return static_cast(duration.count()); - } -}; - -inline int64_t CastSecondsToUnit(TimeUnit::type unit, int64_t seconds) { - return VisitDuration(unit, CastSecondsToUnitImpl{}, seconds); -} - -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/tracing.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/tracing.h deleted file mode 100644 index d7808256..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/tracing.h +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "arrow/util/visibility.h" - -namespace arrow { -namespace util { -namespace tracing { - -class ARROW_EXPORT SpanDetails { - public: - virtual ~SpanDetails() {} -}; - -class ARROW_EXPORT Span { - public: - Span() noexcept; - /// True if this span has been started with START_SPAN - bool valid() const; - /// End the span early - void reset(); - std::unique_ptr details; -}; - -} // namespace tracing -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/trie.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/trie.h deleted file mode 100644 index 7815d4d1..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/trie.h +++ /dev/null @@ -1,243 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "arrow/status.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace internal { - -// A non-zero-terminated small string class. -// std::string usually has a small string optimization -// (see review at https://shaharmike.com/cpp/std-string/) -// but this one allows tight control and optimization of memory layout. -template -class SmallString { - public: - SmallString() : length_(0) {} - - template - SmallString(const T& v) { // NOLINT implicit constructor - *this = std::string_view(v); - } - - SmallString& operator=(const std::string_view s) { -#ifndef NDEBUG - CheckSize(s.size()); -#endif - length_ = static_cast(s.size()); - std::memcpy(data_, s.data(), length_); - return *this; - } - - SmallString& operator=(const std::string& s) { - *this = std::string_view(s); - return *this; - } - - SmallString& operator=(const char* s) { - *this = std::string_view(s); - return *this; - } - - explicit operator std::string_view() const { return std::string_view(data_, length_); } - - const char* data() const { return data_; } - size_t length() const { return length_; } - bool empty() const { return length_ == 0; } - char operator[](size_t pos) const { -#ifdef NDEBUG - assert(pos <= length_); -#endif - return data_[pos]; - } - - SmallString substr(size_t pos) const { - return SmallString(std::string_view(*this).substr(pos)); - } - - SmallString substr(size_t pos, size_t count) const { - return SmallString(std::string_view(*this).substr(pos, count)); - } - - template - bool operator==(T&& other) const { - return std::string_view(*this) == std::string_view(std::forward(other)); - } - - template - bool operator!=(T&& other) const { - return std::string_view(*this) != std::string_view(std::forward(other)); - } - - protected: - uint8_t length_; - char data_[N]; - - void CheckSize(size_t n) { assert(n <= N); } -}; - -template -std::ostream& operator<<(std::ostream& os, const SmallString& str) { - return os << std::string_view(str); -} - -// A trie class for byte strings, optimized for small sets of short strings. -// This class is immutable by design, use a TrieBuilder to construct it. -class ARROW_EXPORT Trie { - using index_type = int16_t; - using fast_index_type = int_fast16_t; - static constexpr auto kMaxIndex = std::numeric_limits::max(); - - public: - Trie() : size_(0) {} - Trie(Trie&&) = default; - Trie& operator=(Trie&&) = default; - - int32_t Find(std::string_view s) const { - const Node* node = &nodes_[0]; - fast_index_type pos = 0; - if (s.length() > static_cast(kMaxIndex)) { - return -1; - } - fast_index_type remaining = static_cast(s.length()); - - while (remaining > 0) { - auto substring_length = node->substring_length(); - if (substring_length > 0) { - auto substring_data = node->substring_data(); - if (remaining < substring_length) { - // Input too short - return -1; - } - for (fast_index_type i = 0; i < substring_length; ++i) { - if (s[pos++] != substring_data[i]) { - // Mismatching substring - return -1; - } - --remaining; - } - if (remaining == 0) { - // Matched node exactly - return node->found_index_; - } - } - // Lookup child using next input character - if (node->child_lookup_ == -1) { - // Input too long - return -1; - } - auto c = static_cast(s[pos++]); - --remaining; - auto child_index = lookup_table_[node->child_lookup_ * 256 + c]; - if (child_index == -1) { - // Child not found - return -1; - } - node = &nodes_[child_index]; - } - - // Input exhausted - if (node->substring_.empty()) { - // Matched node exactly - return node->found_index_; - } else { - return -1; - } - } - - Status Validate() const; - - void Dump() const; - - protected: - static constexpr size_t kNodeSize = 16; - static constexpr auto kMaxSubstringLength = - kNodeSize - 2 * sizeof(index_type) - sizeof(int8_t); - - struct Node { - // If this node is a valid end of string, index of found string, otherwise -1 - index_type found_index_; - // Base index for child lookup in lookup_table_ (-1 if no child nodes) - index_type child_lookup_; - // The substring for this node. - SmallString substring_; - - fast_index_type substring_length() const { - return static_cast(substring_.length()); - } - const char* substring_data() const { return substring_.data(); } - }; - - static_assert(sizeof(Node) == kNodeSize, "Unexpected node size"); - - ARROW_DISALLOW_COPY_AND_ASSIGN(Trie); - - void Dump(const Node* node, const std::string& indent) const; - - // Node table: entry 0 is the root node - std::vector nodes_; - - // Indexed lookup structure: gives index in node table, or -1 if not found - std::vector lookup_table_; - - // Number of entries - index_type size_; - - friend class TrieBuilder; -}; - -class ARROW_EXPORT TrieBuilder { - using index_type = Trie::index_type; - using fast_index_type = Trie::fast_index_type; - - public: - TrieBuilder(); - Status Append(std::string_view s, bool allow_duplicate = false); - Trie Finish(); - - protected: - // Extend the lookup table by 256 entries, return the index of the new span - Status ExtendLookupTable(index_type* out_lookup_index); - // Split the node given by the index at the substring index `split_at` - Status SplitNode(fast_index_type node_index, fast_index_type split_at); - // Append an already constructed child node to the parent - Status AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node); - // Create a matching child node from this parent - Status CreateChildNode(Trie::Node* parent, uint8_t ch, std::string_view substring); - Status CreateChildNode(Trie::Node* parent, char ch, std::string_view substring); - - Trie trie_; - - static constexpr auto kMaxIndex = std::numeric_limits::max(); -}; - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/type_fwd.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/type_fwd.h deleted file mode 100644 index 6d904f19..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/type_fwd.h +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -namespace arrow { - -namespace internal { -struct Empty; -} // namespace internal - -template -class WeakFuture; -class FutureWaiter; - -class TimestampParser; - -namespace internal { - -class Executor; -class TaskGroup; -class ThreadPool; -class CpuInfo; - -namespace tracing { - -struct Scope; - -} // namespace tracing -} // namespace internal - -struct Compression { - /// \brief Compression algorithm - enum type { - UNCOMPRESSED, - SNAPPY, - GZIP, - BROTLI, - ZSTD, - LZ4, - LZ4_FRAME, - LZO, - BZ2, - LZ4_HADOOP - }; -}; - -namespace util { -class AsyncTaskScheduler; -class Compressor; -class Decompressor; -class Codec; -} // namespace util - -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/type_traits.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/type_traits.h deleted file mode 100644 index c1906152..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/type_traits.h +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -namespace arrow { -namespace internal { - -/// \brief Metafunction to allow checking if a type matches any of another set of types -template -struct IsOneOf : std::false_type {}; /// Base case: nothing has matched - -template -struct IsOneOf { - /// Recursive case: T == U or T matches any other types provided (not including U). - static constexpr bool value = std::is_same::value || IsOneOf::value; -}; - -/// \brief Shorthand for using IsOneOf + std::enable_if -template -using EnableIfIsOneOf = typename std::enable_if::value, T>::type; - -/// \brief is_null_pointer from C++17 -template -struct is_null_pointer : std::is_same::type> { -}; - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/ubsan.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/ubsan.h deleted file mode 100644 index 900d8011..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/ubsan.h +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Contains utilities for making UBSan happy. - -#pragma once - -#include -#include -#include - -#include "arrow/util/macros.h" - -namespace arrow { -namespace util { - -namespace internal { - -constexpr uint8_t kNonNullFiller = 0; - -} // namespace internal - -/// \brief Returns maybe_null if not null or a non-null pointer to an arbitrary memory -/// that shouldn't be dereferenced. -/// -/// Memset/Memcpy are undefined when a nullptr is passed as an argument use this utility -/// method to wrap locations where this could happen. -/// -/// Note: Flatbuffers has UBSan warnings if a zero length vector is passed. -/// https://github.com/google/flatbuffers/pull/5355 is trying to resolve -/// them. -template -inline T* MakeNonNull(T* maybe_null = NULLPTR) { - if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) { - return maybe_null; - } - - return const_cast(reinterpret_cast(&internal::kNonNullFiller)); -} - -template -inline std::enable_if_t, T> SafeLoadAs( - const uint8_t* unaligned) { - std::remove_const_t ret; - std::memcpy(&ret, unaligned, sizeof(T)); - return ret; -} - -template -inline std::enable_if_t, T> SafeLoad(const T* unaligned) { - std::remove_const_t ret; - std::memcpy(&ret, unaligned, sizeof(T)); - return ret; -} - -template -inline std::enable_if_t && - std::is_trivially_copyable_v && sizeof(T) == sizeof(U), - U> -SafeCopy(T value) { - std::remove_const_t ret; - std::memcpy(&ret, &value, sizeof(T)); - return ret; -} - -template -inline std::enable_if_t, void> SafeStore(void* unaligned, - T value) { - std::memcpy(unaligned, &value, sizeof(T)); -} - -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/union_util.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/union_util.h deleted file mode 100644 index 0f30d5a3..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/union_util.h +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include "arrow/array/data.h" - -namespace arrow { -namespace union_util { - -/// \brief Compute the number of of logical nulls in a sparse union array -int64_t LogicalSparseUnionNullCount(const ArraySpan& span); - -/// \brief Compute the number of of logical nulls in a dense union array -int64_t LogicalDenseUnionNullCount(const ArraySpan& span); - -} // namespace union_util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/unreachable.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/unreachable.h deleted file mode 100644 index d2e383e7..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/unreachable.h +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/util/visibility.h" - -#include - -namespace arrow { - -[[noreturn]] ARROW_EXPORT void Unreachable(const char* message = "Unreachable"); - -[[noreturn]] ARROW_EXPORT void Unreachable(std::string_view message); - -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/uri.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/uri.h deleted file mode 100644 index 855a6140..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/uri.h +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "arrow/type_fwd.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace internal { - -/// \brief A parsed URI -class ARROW_EXPORT Uri { - public: - Uri(); - ~Uri(); - Uri(Uri&&); - Uri& operator=(Uri&&); - - // XXX Should we use std::string_view instead? These functions are - // not performance-critical. - - /// The URI scheme, such as "http", or the empty string if the URI has no - /// explicit scheme. - std::string scheme() const; - - /// Convenience function that returns true if the scheme() is "file" - bool is_file_scheme() const; - - /// Whether the URI has an explicit host name. This may return true if - /// the URI has an empty host (e.g. "file:///tmp/foo"), while it returns - /// false is the URI has not host component at all (e.g. "file:/tmp/foo"). - bool has_host() const; - /// The URI host name, such as "localhost", "127.0.0.1" or "::1", or the empty - /// string is the URI does not have a host component. - std::string host() const; - - /// The URI port number, as a string such as "80", or the empty string is the URI - /// does not have a port number component. - std::string port_text() const; - /// The URI port parsed as an integer, or -1 if the URI does not have a port - /// number component. - int32_t port() const; - - /// The username specified in the URI. - std::string username() const; - /// The password specified in the URI. - std::string password() const; - - /// The URI path component. - std::string path() const; - - /// The URI query string - std::string query_string() const; - - /// The URI query items - /// - /// Note this API doesn't allow differentiating between an empty value - /// and a missing value, such in "a&b=1" vs. "a=&b=1". - Result>> query_items() const; - - /// Get the string representation of this URI. - const std::string& ToString() const; - - /// Factory function to parse a URI from its string representation. - Status Parse(const std::string& uri_string); - - private: - struct Impl; - std::unique_ptr impl_; -}; - -/// Percent-encode the input string, for use e.g. as a URI query parameter. -/// -/// This will escape directory separators, making this function unsuitable -/// for encoding URI paths directly. See UriFromAbsolutePath() instead. -ARROW_EXPORT -std::string UriEscape(std::string_view s); - -ARROW_EXPORT -std::string UriUnescape(std::string_view s); - -/// Encode a host for use within a URI, such as "localhost", -/// "127.0.0.1", or "[::1]". -ARROW_EXPORT -std::string UriEncodeHost(std::string_view host); - -/// Whether the string is a syntactically valid URI scheme according to RFC 3986. -ARROW_EXPORT -bool IsValidUriScheme(std::string_view s); - -/// Create a file uri from a given absolute path -ARROW_EXPORT -Result UriFromAbsolutePath(std::string_view path); - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/utf8.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/utf8.h deleted file mode 100644 index ca93fab5..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/utf8.h +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include "arrow/type_fwd.h" -#include "arrow/util/macros.h" -#include "arrow/util/visibility.h" - -namespace arrow { -namespace util { - -// Convert a UTF8 string to a wstring (either UTF16 or UTF32, depending -// on the wchar_t width). -ARROW_EXPORT Result UTF8ToWideString(std::string_view source); - -// Similarly, convert a wstring to a UTF8 string. -ARROW_EXPORT Result WideStringToUTF8(const std::wstring& source); - -// Convert UTF8 string to a UTF16 string. -ARROW_EXPORT Result UTF8StringToUTF16(std::string_view source); - -// Convert UTF16 string to a UTF8 string. -ARROW_EXPORT Result UTF16StringToUTF8(std::u16string_view source); - -// This function needs to be called before doing UTF8 validation. -ARROW_EXPORT void InitializeUTF8(); - -ARROW_EXPORT bool ValidateUTF8(const uint8_t* data, int64_t size); - -ARROW_EXPORT bool ValidateUTF8(std::string_view str); - -// Skip UTF8 byte order mark, if any. -ARROW_EXPORT -Result SkipUTF8BOM(const uint8_t* data, int64_t size); - -static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000; - -} // namespace util -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/value_parsing.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/value_parsing.h deleted file mode 100644 index b3c71184..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/value_parsing.h +++ /dev/null @@ -1,928 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// This is a private header for string-to-number parsing utilities - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "arrow/type.h" -#include "arrow/type_traits.h" -#include "arrow/util/checked_cast.h" -#include "arrow/util/config.h" -#include "arrow/util/macros.h" -#include "arrow/util/time.h" -#include "arrow/util/visibility.h" -#include "arrow/vendored/datetime.h" -#include "arrow/vendored/strptime.h" - -namespace arrow { - -/// \brief A virtual string to timestamp parser -class ARROW_EXPORT TimestampParser { - public: - virtual ~TimestampParser() = default; - - virtual bool operator()(const char* s, size_t length, TimeUnit::type out_unit, - int64_t* out, - bool* out_zone_offset_present = NULLPTR) const = 0; - - virtual const char* kind() const = 0; - - virtual const char* format() const; - - /// \brief Create a TimestampParser that recognizes strptime-like format strings - static std::shared_ptr MakeStrptime(std::string format); - - /// \brief Create a TimestampParser that recognizes (locale-agnostic) ISO8601 - /// timestamps - static std::shared_ptr MakeISO8601(); -}; - -namespace internal { - -/// \brief The entry point for conversion from strings. -/// -/// Specializations of StringConverter for `ARROW_TYPE` must define: -/// - A default constructible member type `value_type` which will be yielded on a -/// successful parse. -/// - The static member function `Convert`, callable with signature -/// `(const ARROW_TYPE& t, const char* s, size_t length, value_type* out)`. -/// `Convert` returns truthy for successful parses and assigns the parsed values to -/// `*out`. Parameters required for parsing (for example a timestamp's TimeUnit) -/// are acquired from the type parameter `t`. -template -struct StringConverter; - -template -struct is_parseable { - template ::value_type> - static std::true_type Test(U*); - - template - static std::false_type Test(...); - - static constexpr bool value = decltype(Test(NULLPTR))::value; -}; - -template -using enable_if_parseable = enable_if_t::value, R>; - -template <> -struct StringConverter { - using value_type = bool; - - bool Convert(const BooleanType&, const char* s, size_t length, value_type* out) { - if (length == 1) { - // "0" or "1"? - if (s[0] == '0') { - *out = false; - return true; - } - if (s[0] == '1') { - *out = true; - return true; - } - return false; - } - if (length == 4) { - // "true"? - *out = true; - return ((s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') && - (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')); - } - if (length == 5) { - // "false"? - *out = false; - return ((s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') && - (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') && - (s[4] == 'e' || s[4] == 'E')); - } - return false; - } -}; - -// Ideas for faster float parsing: -// - http://rapidjson.org/md_doc_internals.html#ParsingDouble -// - https://github.com/google/double-conversion [used here] -// - https://github.com/achan001/dtoa-fast - -ARROW_EXPORT -bool StringToFloat(const char* s, size_t length, char decimal_point, float* out); - -ARROW_EXPORT -bool StringToFloat(const char* s, size_t length, char decimal_point, double* out); - -template <> -struct StringConverter { - using value_type = float; - - explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {} - - bool Convert(const FloatType&, const char* s, size_t length, value_type* out) { - return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out)); - } - - private: - const char decimal_point; -}; - -template <> -struct StringConverter { - using value_type = double; - - explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {} - - bool Convert(const DoubleType&, const char* s, size_t length, value_type* out) { - return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out)); - } - - private: - const char decimal_point; -}; - -// NOTE: HalfFloatType would require a half<->float conversion library - -inline uint8_t ParseDecimalDigit(char c) { return static_cast(c - '0'); } - -#define PARSE_UNSIGNED_ITERATION(C_TYPE) \ - if (length > 0) { \ - uint8_t digit = ParseDecimalDigit(*s++); \ - result = static_cast(result * 10U); \ - length--; \ - if (ARROW_PREDICT_FALSE(digit > 9U)) { \ - /* Non-digit */ \ - return false; \ - } \ - result = static_cast(result + digit); \ - } else { \ - break; \ - } - -#define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE) \ - if (length > 0) { \ - if (ARROW_PREDICT_FALSE(result > std::numeric_limits::max() / 10U)) { \ - /* Overflow */ \ - return false; \ - } \ - uint8_t digit = ParseDecimalDigit(*s++); \ - result = static_cast(result * 10U); \ - C_TYPE new_result = static_cast(result + digit); \ - if (ARROW_PREDICT_FALSE(--length > 0)) { \ - /* Too many digits */ \ - return false; \ - } \ - if (ARROW_PREDICT_FALSE(digit > 9U)) { \ - /* Non-digit */ \ - return false; \ - } \ - if (ARROW_PREDICT_FALSE(new_result < result)) { \ - /* Overflow */ \ - return false; \ - } \ - result = new_result; \ - } - -inline bool ParseUnsigned(const char* s, size_t length, uint8_t* out) { - uint8_t result = 0; - - do { - PARSE_UNSIGNED_ITERATION(uint8_t); - PARSE_UNSIGNED_ITERATION(uint8_t); - PARSE_UNSIGNED_ITERATION_LAST(uint8_t); - } while (false); - *out = result; - return true; -} - -inline bool ParseUnsigned(const char* s, size_t length, uint16_t* out) { - uint16_t result = 0; - do { - PARSE_UNSIGNED_ITERATION(uint16_t); - PARSE_UNSIGNED_ITERATION(uint16_t); - PARSE_UNSIGNED_ITERATION(uint16_t); - PARSE_UNSIGNED_ITERATION(uint16_t); - PARSE_UNSIGNED_ITERATION_LAST(uint16_t); - } while (false); - *out = result; - return true; -} - -inline bool ParseUnsigned(const char* s, size_t length, uint32_t* out) { - uint32_t result = 0; - do { - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - PARSE_UNSIGNED_ITERATION(uint32_t); - - PARSE_UNSIGNED_ITERATION_LAST(uint32_t); - } while (false); - *out = result; - return true; -} - -inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) { - uint64_t result = 0; - do { - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - PARSE_UNSIGNED_ITERATION(uint64_t); - - PARSE_UNSIGNED_ITERATION_LAST(uint64_t); - } while (false); - *out = result; - return true; -} - -#undef PARSE_UNSIGNED_ITERATION -#undef PARSE_UNSIGNED_ITERATION_LAST - -template -bool ParseHex(const char* s, size_t length, T* out) { - // lets make sure that the length of the string is not too big - if (!ARROW_PREDICT_TRUE(sizeof(T) * 2 >= length && length > 0)) { - return false; - } - T result = 0; - for (size_t i = 0; i < length; i++) { - result = static_cast(result << 4); - if (s[i] >= '0' && s[i] <= '9') { - result = static_cast(result | (s[i] - '0')); - } else if (s[i] >= 'A' && s[i] <= 'F') { - result = static_cast(result | (s[i] - 'A' + 10)); - } else if (s[i] >= 'a' && s[i] <= 'f') { - result = static_cast(result | (s[i] - 'a' + 10)); - } else { - /* Non-digit */ - return false; - } - } - *out = result; - return true; -} - -template -struct StringToUnsignedIntConverterMixin { - using value_type = typename ARROW_TYPE::c_type; - - bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) { - if (ARROW_PREDICT_FALSE(length == 0)) { - return false; - } - // If it starts with 0x then its hex - if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) { - length -= 2; - s += 2; - - return ARROW_PREDICT_TRUE(ParseHex(s, length, out)); - } - // Skip leading zeros - while (length > 0 && *s == '0') { - length--; - s++; - } - return ParseUnsigned(s, length, out); - } -}; - -template <> -struct StringConverter : public StringToUnsignedIntConverterMixin { - using StringToUnsignedIntConverterMixin::StringToUnsignedIntConverterMixin; -}; - -template <> -struct StringConverter - : public StringToUnsignedIntConverterMixin { - using StringToUnsignedIntConverterMixin::StringToUnsignedIntConverterMixin; -}; - -template <> -struct StringConverter - : public StringToUnsignedIntConverterMixin { - using StringToUnsignedIntConverterMixin::StringToUnsignedIntConverterMixin; -}; - -template <> -struct StringConverter - : public StringToUnsignedIntConverterMixin { - using StringToUnsignedIntConverterMixin::StringToUnsignedIntConverterMixin; -}; - -template -struct StringToSignedIntConverterMixin { - using value_type = typename ARROW_TYPE::c_type; - using unsigned_type = typename std::make_unsigned::type; - - bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) { - static constexpr auto max_positive = - static_cast(std::numeric_limits::max()); - // Assuming two's complement - static constexpr unsigned_type max_negative = max_positive + 1; - bool negative = false; - unsigned_type unsigned_value = 0; - - if (ARROW_PREDICT_FALSE(length == 0)) { - return false; - } - // If it starts with 0x then its hex - if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) { - length -= 2; - s += 2; - - if (!ARROW_PREDICT_TRUE(ParseHex(s, length, &unsigned_value))) { - return false; - } - *out = static_cast(unsigned_value); - return true; - } - - if (*s == '-') { - negative = true; - s++; - if (--length == 0) { - return false; - } - } - // Skip leading zeros - while (length > 0 && *s == '0') { - length--; - s++; - } - if (!ARROW_PREDICT_TRUE(ParseUnsigned(s, length, &unsigned_value))) { - return false; - } - if (negative) { - if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) { - return false; - } - // To avoid both compiler warnings (with unsigned negation) - // and undefined behaviour (with signed negation overflow), - // use the expanded formula for 2's complement negation. - *out = static_cast(~unsigned_value + 1); - } else { - if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) { - return false; - } - *out = static_cast(unsigned_value); - } - return true; - } -}; - -template <> -struct StringConverter : public StringToSignedIntConverterMixin { - using StringToSignedIntConverterMixin::StringToSignedIntConverterMixin; -}; - -template <> -struct StringConverter : public StringToSignedIntConverterMixin { - using StringToSignedIntConverterMixin::StringToSignedIntConverterMixin; -}; - -template <> -struct StringConverter : public StringToSignedIntConverterMixin { - using StringToSignedIntConverterMixin::StringToSignedIntConverterMixin; -}; - -template <> -struct StringConverter : public StringToSignedIntConverterMixin { - using StringToSignedIntConverterMixin::StringToSignedIntConverterMixin; -}; - -namespace detail { - -// Inline-able ISO-8601 parser - -using ts_type = TimestampType::c_type; - -template -static inline bool ParseHH(const char* s, Duration* out) { - uint8_t hours = 0; - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) { - return false; - } - if (ARROW_PREDICT_FALSE(hours >= 24)) { - return false; - } - *out = std::chrono::duration_cast(std::chrono::hours(hours)); - return true; -} - -template -static inline bool ParseHH_MM(const char* s, Duration* out) { - uint8_t hours = 0; - uint8_t minutes = 0; - if (ARROW_PREDICT_FALSE(s[2] != ':')) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) { - return false; - } - if (ARROW_PREDICT_FALSE(hours >= 24)) { - return false; - } - if (ARROW_PREDICT_FALSE(minutes >= 60)) { - return false; - } - *out = std::chrono::duration_cast(std::chrono::hours(hours) + - std::chrono::minutes(minutes)); - return true; -} - -template -static inline bool ParseHHMM(const char* s, Duration* out) { - uint8_t hours = 0; - uint8_t minutes = 0; - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 2, 2, &minutes))) { - return false; - } - if (ARROW_PREDICT_FALSE(hours >= 24)) { - return false; - } - if (ARROW_PREDICT_FALSE(minutes >= 60)) { - return false; - } - *out = std::chrono::duration_cast(std::chrono::hours(hours) + - std::chrono::minutes(minutes)); - return true; -} - -template -static inline bool ParseHH_MM_SS(const char* s, Duration* out) { - uint8_t hours = 0; - uint8_t minutes = 0; - uint8_t seconds = 0; - if (ARROW_PREDICT_FALSE(s[2] != ':') || ARROW_PREDICT_FALSE(s[5] != ':')) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 6, 2, &seconds))) { - return false; - } - if (ARROW_PREDICT_FALSE(hours >= 24)) { - return false; - } - if (ARROW_PREDICT_FALSE(minutes >= 60)) { - return false; - } - if (ARROW_PREDICT_FALSE(seconds >= 60)) { - return false; - } - *out = std::chrono::duration_cast(std::chrono::hours(hours) + - std::chrono::minutes(minutes) + - std::chrono::seconds(seconds)); - return true; -} - -static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type unit, - uint32_t* out) { - // The decimal point has been peeled off at this point - - // Fail if number of decimal places provided exceeds what the unit can hold. - // Calculate how many trailing decimal places are omitted for the unit - // e.g. if 4 decimal places are provided and unit is MICRO, 2 are missing - size_t omitted = 0; - switch (unit) { - case TimeUnit::MILLI: - if (ARROW_PREDICT_FALSE(length > 3)) { - return false; - } - if (length < 3) { - omitted = 3 - length; - } - break; - case TimeUnit::MICRO: - if (ARROW_PREDICT_FALSE(length > 6)) { - return false; - } - if (length < 6) { - omitted = 6 - length; - } - break; - case TimeUnit::NANO: - if (ARROW_PREDICT_FALSE(length > 9)) { - return false; - } - if (length < 9) { - omitted = 9 - length; - } - break; - default: - return false; - } - - if (ARROW_PREDICT_TRUE(omitted == 0)) { - return ParseUnsigned(s, length, out); - } else { - uint32_t subseconds = 0; - bool success = ParseUnsigned(s, length, &subseconds); - if (ARROW_PREDICT_TRUE(success)) { - switch (omitted) { - case 1: - *out = subseconds * 10; - break; - case 2: - *out = subseconds * 100; - break; - case 3: - *out = subseconds * 1000; - break; - case 4: - *out = subseconds * 10000; - break; - case 5: - *out = subseconds * 100000; - break; - case 6: - *out = subseconds * 1000000; - break; - case 7: - *out = subseconds * 10000000; - break; - case 8: - *out = subseconds * 100000000; - break; - default: - // Impossible case - break; - } - return true; - } else { - return false; - } - } -} - -} // namespace detail - -template -static inline bool ParseYYYY_MM_DD(const char* s, Duration* since_epoch) { - uint16_t year = 0; - uint8_t month = 0; - uint8_t day = 0; - if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 4, &year))) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 5, 2, &month))) { - return false; - } - if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 8, 2, &day))) { - return false; - } - arrow_vendored::date::year_month_day ymd{arrow_vendored::date::year{year}, - arrow_vendored::date::month{month}, - arrow_vendored::date::day{day}}; - if (ARROW_PREDICT_FALSE(!ymd.ok())) return false; - - *since_epoch = std::chrono::duration_cast( - arrow_vendored::date::sys_days{ymd}.time_since_epoch()); - return true; -} - -static inline bool ParseTimestampISO8601(const char* s, size_t length, - TimeUnit::type unit, TimestampType::c_type* out, - bool* out_zone_offset_present = NULLPTR) { - using seconds_type = std::chrono::duration; - - // We allow the following zone offset formats: - // - (none) - // - Z - // - [+-]HH(:?MM)? - // - // We allow the following formats for all units: - // - "YYYY-MM-DD" - // - "YYYY-MM-DD[ T]hhZ?" - // - "YYYY-MM-DD[ T]hh:mmZ?" - // - "YYYY-MM-DD[ T]hh:mm:ssZ?" - // - // We allow the following formats for unit == MILLI, MICRO, or NANO: - // - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?" - // - // We allow the following formats for unit == MICRO, or NANO: - // - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?" - // - // We allow the following formats for unit == NANO: - // - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?" - // - // UTC is always assumed, and the DataType's timezone is ignored. - // - - if (ARROW_PREDICT_FALSE(length < 10)) return false; - - seconds_type seconds_since_epoch; - if (ARROW_PREDICT_FALSE(!ParseYYYY_MM_DD(s, &seconds_since_epoch))) { - return false; - } - - if (length == 10) { - *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()); - return true; - } - - if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) { - return false; - } - - if (out_zone_offset_present) { - *out_zone_offset_present = false; - } - - seconds_type zone_offset(0); - if (s[length - 1] == 'Z') { - --length; - if (out_zone_offset_present) *out_zone_offset_present = true; - } else if (s[length - 3] == '+' || s[length - 3] == '-') { - // [+-]HH - length -= 3; - if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + length + 1, &zone_offset))) { - return false; - } - if (s[length] == '+') zone_offset *= -1; - if (out_zone_offset_present) *out_zone_offset_present = true; - } else if (s[length - 5] == '+' || s[length - 5] == '-') { - // [+-]HHMM - length -= 5; - if (ARROW_PREDICT_FALSE(!detail::ParseHHMM(s + length + 1, &zone_offset))) { - return false; - } - if (s[length] == '+') zone_offset *= -1; - if (out_zone_offset_present) *out_zone_offset_present = true; - } else if ((s[length - 6] == '+' || s[length - 6] == '-') && (s[length - 3] == ':')) { - // [+-]HH:MM - length -= 6; - if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + length + 1, &zone_offset))) { - return false; - } - if (s[length] == '+') zone_offset *= -1; - if (out_zone_offset_present) *out_zone_offset_present = true; - } - - seconds_type seconds_since_midnight; - switch (length) { - case 13: // YYYY-MM-DD[ T]hh - if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + 11, &seconds_since_midnight))) { - return false; - } - break; - case 16: // YYYY-MM-DD[ T]hh:mm - if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + 11, &seconds_since_midnight))) { - return false; - } - break; - case 19: // YYYY-MM-DD[ T]hh:mm:ss - case 21: // YYYY-MM-DD[ T]hh:mm:ss.s - case 22: // YYYY-MM-DD[ T]hh:mm:ss.ss - case 23: // YYYY-MM-DD[ T]hh:mm:ss.sss - case 24: // YYYY-MM-DD[ T]hh:mm:ss.ssss - case 25: // YYYY-MM-DD[ T]hh:mm:ss.sssss - case 26: // YYYY-MM-DD[ T]hh:mm:ss.ssssss - case 27: // YYYY-MM-DD[ T]hh:mm:ss.sssssss - case 28: // YYYY-MM-DD[ T]hh:mm:ss.ssssssss - case 29: // YYYY-MM-DD[ T]hh:mm:ss.sssssssss - if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) { - return false; - } - break; - default: - return false; - } - - seconds_since_epoch += seconds_since_midnight; - seconds_since_epoch += zone_offset; - - if (length <= 19) { - *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()); - return true; - } - - if (ARROW_PREDICT_FALSE(s[19] != '.')) { - return false; - } - - uint32_t subseconds = 0; - if (ARROW_PREDICT_FALSE( - !detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) { - return false; - } - - *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()) + subseconds; - return true; -} - -#if defined(_WIN32) || defined(ARROW_WITH_MUSL) -static constexpr bool kStrptimeSupportsZone = false; -#else -static constexpr bool kStrptimeSupportsZone = true; -#endif - -/// \brief Returns time since the UNIX epoch in the requested unit -static inline bool ParseTimestampStrptime(const char* buf, size_t length, - const char* format, bool ignore_time_in_day, - bool allow_trailing_chars, TimeUnit::type unit, - int64_t* out) { - // NOTE: strptime() is more than 10x faster than arrow_vendored::date::parse(). - // The buffer may not be nul-terminated - std::string clean_copy(buf, length); - struct tm result; - memset(&result, 0, sizeof(struct tm)); -#ifdef _WIN32 - char* ret = arrow_strptime(clean_copy.c_str(), format, &result); -#else - char* ret = strptime(clean_copy.c_str(), format, &result); -#endif - if (ret == NULLPTR) { - return false; - } - if (!allow_trailing_chars && static_cast(ret - clean_copy.c_str()) != length) { - return false; - } - // ignore the time part - arrow_vendored::date::sys_seconds secs = - arrow_vendored::date::sys_days(arrow_vendored::date::year(result.tm_year + 1900) / - (result.tm_mon + 1) / std::max(result.tm_mday, 1)); - if (!ignore_time_in_day) { - secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) + - std::chrono::seconds(result.tm_sec)); -#ifndef _WIN32 - secs -= std::chrono::seconds(result.tm_gmtoff); -#endif - } - *out = util::CastSecondsToUnit(unit, secs.time_since_epoch().count()); - return true; -} - -template <> -struct StringConverter { - using value_type = int64_t; - - bool Convert(const TimestampType& type, const char* s, size_t length, value_type* out) { - return ParseTimestampISO8601(s, length, type.unit(), out); - } -}; - -template <> -struct StringConverter - : public StringToSignedIntConverterMixin { - using StringToSignedIntConverterMixin::StringToSignedIntConverterMixin; -}; - -template -struct StringConverter> { - using value_type = typename DATE_TYPE::c_type; - - using duration_type = - typename std::conditional::value, - arrow_vendored::date::days, - std::chrono::milliseconds>::type; - - bool Convert(const DATE_TYPE& type, const char* s, size_t length, value_type* out) { - if (ARROW_PREDICT_FALSE(length != 10)) { - return false; - } - - duration_type since_epoch; - if (ARROW_PREDICT_FALSE(!ParseYYYY_MM_DD(s, &since_epoch))) { - return false; - } - - *out = static_cast(since_epoch.count()); - return true; - } -}; - -template -struct StringConverter> { - using value_type = typename TIME_TYPE::c_type; - - // We allow the following formats for all units: - // - "hh:mm" - // - "hh:mm:ss" - // - // We allow the following formats for unit == MILLI, MICRO, or NANO: - // - "hh:mm:ss.s{1,3}" - // - // We allow the following formats for unit == MICRO, or NANO: - // - "hh:mm:ss.s{4,6}" - // - // We allow the following formats for unit == NANO: - // - "hh:mm:ss.s{7,9}" - - bool Convert(const TIME_TYPE& type, const char* s, size_t length, value_type* out) { - const auto unit = type.unit(); - std::chrono::seconds since_midnight; - - if (length == 5) { - if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s, &since_midnight))) { - return false; - } - *out = - static_cast(util::CastSecondsToUnit(unit, since_midnight.count())); - return true; - } - - if (ARROW_PREDICT_FALSE(length < 8)) { - return false; - } - if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) { - return false; - } - - *out = static_cast(util::CastSecondsToUnit(unit, since_midnight.count())); - - if (length == 8) { - return true; - } - - if (ARROW_PREDICT_FALSE(s[8] != '.')) { - return false; - } - - uint32_t subseconds_count = 0; - if (ARROW_PREDICT_FALSE( - !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) { - return false; - } - - *out += subseconds_count; - return true; - } -}; - -/// \brief Convenience wrappers around internal::StringConverter. -template -bool ParseValue(const T& type, const char* s, size_t length, - typename StringConverter::value_type* out) { - return StringConverter{}.Convert(type, s, length, out); -} - -template -enable_if_parameter_free ParseValue( - const char* s, size_t length, typename StringConverter::value_type* out) { - static T type; - return StringConverter{}.Convert(type, s, length, out); -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/vector.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/vector.h deleted file mode 100644 index e3c0a67c..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/vector.h +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "arrow/result.h" -#include "arrow/util/algorithm.h" -#include "arrow/util/functional.h" -#include "arrow/util/logging.h" - -namespace arrow { -namespace internal { - -template -std::vector DeleteVectorElement(const std::vector& values, size_t index) { - DCHECK(!values.empty()); - DCHECK_LT(index, values.size()); - std::vector out; - out.reserve(values.size() - 1); - for (size_t i = 0; i < index; ++i) { - out.push_back(values[i]); - } - for (size_t i = index + 1; i < values.size(); ++i) { - out.push_back(values[i]); - } - return out; -} - -template -std::vector AddVectorElement(const std::vector& values, size_t index, - T new_element) { - DCHECK_LE(index, values.size()); - std::vector out; - out.reserve(values.size() + 1); - for (size_t i = 0; i < index; ++i) { - out.push_back(values[i]); - } - out.emplace_back(std::move(new_element)); - for (size_t i = index; i < values.size(); ++i) { - out.push_back(values[i]); - } - return out; -} - -template -std::vector ReplaceVectorElement(const std::vector& values, size_t index, - T new_element) { - DCHECK_LE(index, values.size()); - std::vector out; - out.reserve(values.size()); - for (size_t i = 0; i < index; ++i) { - out.push_back(values[i]); - } - out.emplace_back(std::move(new_element)); - for (size_t i = index + 1; i < values.size(); ++i) { - out.push_back(values[i]); - } - return out; -} - -template -std::vector FilterVector(std::vector values, Predicate&& predicate) { - auto new_end = std::remove_if(values.begin(), values.end(), - [&](const T& value) { return !predicate(value); }); - values.erase(new_end, values.end()); - return values; -} - -template ()(std::declval()))> -std::vector MapVector(Fn&& map, const std::vector& source) { - std::vector out; - out.reserve(source.size()); - std::transform(source.begin(), source.end(), std::back_inserter(out), - std::forward(map)); - return out; -} - -template ()(std::declval()))> -std::vector MapVector(Fn&& map, std::vector&& source) { - std::vector out; - out.reserve(source.size()); - std::transform(std::make_move_iterator(source.begin()), - std::make_move_iterator(source.end()), std::back_inserter(out), - std::forward(map)); - return out; -} - -/// \brief Like MapVector, but where the function can fail. -template , - typename To = typename internal::call_traits::return_type::ValueType> -Result> MaybeMapVector(Fn&& map, const std::vector& source) { - std::vector out; - out.reserve(source.size()); - ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(), - std::back_inserter(out), std::forward(map))); - return std::move(out); -} - -template , - typename To = typename internal::call_traits::return_type::ValueType> -Result> MaybeMapVector(Fn&& map, std::vector&& source) { - std::vector out; - out.reserve(source.size()); - ARROW_RETURN_NOT_OK(MaybeTransform(std::make_move_iterator(source.begin()), - std::make_move_iterator(source.end()), - std::back_inserter(out), std::forward(map))); - return std::move(out); -} - -template -std::vector FlattenVectors(const std::vector>& vecs) { - std::size_t sum = 0; - for (const auto& vec : vecs) { - sum += vec.size(); - } - std::vector out; - out.reserve(sum); - for (const auto& vec : vecs) { - out.insert(out.end(), vec.begin(), vec.end()); - } - return out; -} - -template -Result> UnwrapOrRaise(std::vector>&& results) { - std::vector out; - out.reserve(results.size()); - auto end = std::make_move_iterator(results.end()); - for (auto it = std::make_move_iterator(results.begin()); it != end; it++) { - if (!it->ok()) { - return it->status(); - } - out.push_back(it->MoveValueUnsafe()); - } - return std::move(out); -} - -template -Result> UnwrapOrRaise(const std::vector>& results) { - std::vector out; - out.reserve(results.size()); - for (const auto& result : results) { - if (!result.ok()) { - return result.status(); - } - out.push_back(result.ValueUnsafe()); - } - return std::move(out); -} - -} // namespace internal -} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/visibility.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/visibility.h deleted file mode 100644 index b0fd7902..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/visibility.h +++ /dev/null @@ -1,83 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#if defined(_WIN32) || defined(__CYGWIN__) -// Windows - -#if defined(_MSC_VER) -#pragma warning(disable : 4251) -#else -#pragma GCC diagnostic ignored "-Wattributes" -#endif - -#if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__) -// Use C++ attribute syntax where possible to avoid GCC parser bug -// (https://stackoverflow.com/questions/57993818/gcc-how-to-combine-attribute-dllexport-and-nodiscard-in-a-struct-de) -#define ARROW_DLLEXPORT [[gnu::dllexport]] -#define ARROW_DLLIMPORT [[gnu::dllimport]] -#else -#define ARROW_DLLEXPORT __declspec(dllexport) -#define ARROW_DLLIMPORT __declspec(dllimport) -#endif - -#ifdef ARROW_STATIC -#define ARROW_EXPORT -#define ARROW_FRIEND_EXPORT -#define ARROW_TEMPLATE_EXPORT -#elif defined(ARROW_EXPORTING) -#define ARROW_EXPORT ARROW_DLLEXPORT -// For some reason [[gnu::dllexport]] doesn't work well with friend declarations -#define ARROW_FRIEND_EXPORT __declspec(dllexport) -#define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT -#else -#define ARROW_EXPORT ARROW_DLLIMPORT -#define ARROW_FRIEND_EXPORT __declspec(dllimport) -#define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT -#endif - -#define ARROW_NO_EXPORT -#define ARROW_FORCE_INLINE __forceinline - -#else - -// Non-Windows - -#define ARROW_FORCE_INLINE - -#if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__)) -#ifndef ARROW_EXPORT -#define ARROW_EXPORT [[gnu::visibility("default")]] -#endif -#ifndef ARROW_NO_EXPORT -#define ARROW_NO_EXPORT [[gnu::visibility("hidden")]] -#endif -#else -// Not C++, or not gcc/clang -#ifndef ARROW_EXPORT -#define ARROW_EXPORT -#endif -#ifndef ARROW_NO_EXPORT -#define ARROW_NO_EXPORT -#endif -#endif - -#define ARROW_FRIEND_EXPORT -#define ARROW_TEMPLATE_EXPORT - -#endif // Non-Windows diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/windows_compatibility.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/windows_compatibility.h deleted file mode 100644 index ea0d0167..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/windows_compatibility.h +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifdef _WIN32 - -// Windows defines min and max macros that mess up std::min/max -#ifndef NOMINMAX -#define NOMINMAX -#endif - -#define WIN32_LEAN_AND_MEAN - -// Set Windows 7 as a conservative minimum for Apache Arrow -#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601 -#undef _WIN32_WINNT -#endif -#ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x601 -#endif - -#include -#include - -#include "arrow/util/windows_fixup.h" - -#endif // _WIN32 diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/windows_fixup.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/windows_fixup.h deleted file mode 100644 index 2949ac4a..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/util/windows_fixup.h +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// This header needs to be included multiple times. - -#ifdef _WIN32 - -#ifdef max -#undef max -#endif -#ifdef min -#undef min -#endif - -// The Windows API defines macros from *File resolving to either -// *FileA or *FileW. Need to undo them. -#ifdef CopyFile -#undef CopyFile -#endif -#ifdef CreateFile -#undef CreateFile -#endif -#ifdef DeleteFile -#undef DeleteFile -#endif - -// Other annoying Windows macro definitions... -#ifdef IN -#undef IN -#endif -#ifdef OUT -#undef OUT -#endif - -// Note that we can't undefine OPTIONAL, because it can be used in other -// Windows headers... - -#endif // _WIN32 diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/portable-snippets/debug-trap.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/portable-snippets/debug-trap.h deleted file mode 100644 index 6d039064..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/portable-snippets/debug-trap.h +++ /dev/null @@ -1,83 +0,0 @@ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - */ - -#if !defined(PSNIP_DEBUG_TRAP_H) -#define PSNIP_DEBUG_TRAP_H - -#if !defined(PSNIP_NDEBUG) && defined(NDEBUG) && !defined(PSNIP_DEBUG) -# define PSNIP_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define psnip_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define psnip_trap() __debugbreak() -# endif -#endif -#if !defined(psnip_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define psnip_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define psnip_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define psnip_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void psnip_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void psnip_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void psnip_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void psnip_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void psnip_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void psnip_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void psnip_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void psnip_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void psnip_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void psnip_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define psnip_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define psnip_trap() raise(SIGTRAP) -# else -# define psnip_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define PSNIP_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define PSNIP_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define PSNIP_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(PSNIP_NDEBUG) || (PSNIP_NDEBUG == 0) -# define psnip_dbg_assert(expr) do { \ - if (!PSNIP_DBG_LIKELY(expr)) { \ - psnip_trap(); \ - } \ - } while (0) -#else -# define psnip_dbg_assert(expr) -#endif - -#endif /* !defined(PSNIP_DEBUG_TRAP_H) */ diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/portable-snippets/safe-math.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/portable-snippets/safe-math.h deleted file mode 100644 index 7f6426ac..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/portable-snippets/safe-math.h +++ /dev/null @@ -1,1072 +0,0 @@ -/* Overflow-safe math functions - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - */ - -#if !defined(PSNIP_SAFE_H) -#define PSNIP_SAFE_H - -#if !defined(PSNIP_SAFE_FORCE_PORTABLE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_add_overflow) && !defined(__ibmxl__) -# define PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__INTEL_COMPILER) -# define PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW -# endif -# if defined(__has_include) -# if __has_include() -# define PSNIP_SAFE_HAVE_INTSAFE_H -# endif -# elif defined(_WIN32) -# define PSNIP_SAFE_HAVE_INTSAFE_H -# endif -#endif /* !defined(PSNIP_SAFE_FORCE_PORTABLE) */ - -#if defined(__GNUC__) -# define PSNIP_SAFE_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define PSNIP_SAFE_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define PSNIP_SAFE_LIKELY(expr) !!(expr) -# define PSNIP_SAFE_UNLIKELY(expr) !!(expr) -#endif /* defined(__GNUC__) */ - -#if !defined(PSNIP_SAFE_STATIC_INLINE) -# if defined(__GNUC__) -# define PSNIP_SAFE__COMPILER_ATTRIBUTES __attribute__((__unused__)) -# else -# define PSNIP_SAFE__COMPILER_ATTRIBUTES -# endif - -# if defined(HEDLEY_INLINE) -# define PSNIP_SAFE__INLINE HEDLEY_INLINE -# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -# define PSNIP_SAFE__INLINE inline -# elif defined(__GNUC_STDC_INLINE__) -# define PSNIP_SAFE__INLINE __inline__ -# elif defined(_MSC_VER) && _MSC_VER >= 1200 -# define PSNIP_SAFE__INLINE __inline -# else -# define PSNIP_SAFE__INLINE -# endif - -# define PSNIP_SAFE__FUNCTION PSNIP_SAFE__COMPILER_ATTRIBUTES static PSNIP_SAFE__INLINE -#endif - -// !defined(__cplusplus) added for Solaris support -#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -# define psnip_safe_bool _Bool -#else -# define psnip_safe_bool int -#endif - -#if !defined(PSNIP_SAFE_NO_FIXED) -/* For maximum portability include the exact-int module from - portable snippets. */ -# if \ - !defined(psnip_int64_t) || !defined(psnip_uint64_t) || \ - !defined(psnip_int32_t) || !defined(psnip_uint32_t) || \ - !defined(psnip_int16_t) || !defined(psnip_uint16_t) || \ - !defined(psnip_int8_t) || !defined(psnip_uint8_t) -# include -# if !defined(psnip_int64_t) -# define psnip_int64_t int64_t -# endif -# if !defined(psnip_uint64_t) -# define psnip_uint64_t uint64_t -# endif -# if !defined(psnip_int32_t) -# define psnip_int32_t int32_t -# endif -# if !defined(psnip_uint32_t) -# define psnip_uint32_t uint32_t -# endif -# if !defined(psnip_int16_t) -# define psnip_int16_t int16_t -# endif -# if !defined(psnip_uint16_t) -# define psnip_uint16_t uint16_t -# endif -# if !defined(psnip_int8_t) -# define psnip_int8_t int8_t -# endif -# if !defined(psnip_uint8_t) -# define psnip_uint8_t uint8_t -# endif -# endif -#endif /* !defined(PSNIP_SAFE_NO_FIXED) */ -#include -#include - -#if !defined(PSNIP_SAFE_SIZE_MAX) -# if defined(__SIZE_MAX__) -# define PSNIP_SAFE_SIZE_MAX __SIZE_MAX__ -# elif defined(PSNIP_EXACT_INT_HAVE_STDINT) -# include -# endif -#endif - -#if defined(PSNIP_SAFE_SIZE_MAX) -# define PSNIP_SAFE__SIZE_MAX_RT PSNIP_SAFE_SIZE_MAX -#else -# define PSNIP_SAFE__SIZE_MAX_RT (~((size_t) 0)) -#endif - -#if defined(PSNIP_SAFE_HAVE_INTSAFE_H) -/* In VS 10, stdint.h and intsafe.h both define (U)INTN_MIN/MAX, which - triggers warning C4005 (level 1). */ -# if defined(_MSC_VER) && (_MSC_VER == 1600) -# pragma warning(push) -# pragma warning(disable:4005) -# endif -# include -# if defined(_MSC_VER) && (_MSC_VER == 1600) -# pragma warning(pop) -# endif -#endif /* defined(PSNIP_SAFE_HAVE_INTSAFE_H) */ - -/* If there is a type larger than the one we're concerned with it's - * likely much faster to simply promote the operands, perform the - * requested operation, verify that the result falls within the - * original type, then cast the result back to the original type. */ - -#if !defined(PSNIP_SAFE_NO_PROMOTIONS) - -#define PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, op_name, op) \ - PSNIP_SAFE__FUNCTION psnip_safe_##name##_larger \ - psnip_safe_larger_##name##_##op_name (T a, T b) { \ - return ((psnip_safe_##name##_larger) a) op ((psnip_safe_##name##_larger) b); \ - } - -#define PSNIP_SAFE_DEFINE_LARGER_UNARY_OP(T, name, op_name, op) \ - PSNIP_SAFE__FUNCTION psnip_safe_##name##_larger \ - psnip_safe_larger_##name##_##op_name (T value) { \ - return (op ((psnip_safe_##name##_larger) value)); \ - } - -#define PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(T, name) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, add, +) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, sub, -) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mul, *) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, div, /) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mod, %) \ - PSNIP_SAFE_DEFINE_LARGER_UNARY_OP (T, name, neg, -) - -#define PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(T, name) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, add, +) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, sub, -) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mul, *) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, div, /) \ - PSNIP_SAFE_DEFINE_LARGER_BINARY_OP(T, name, mod, %) - -#define PSNIP_SAFE_IS_LARGER(ORIG_MAX, DEST_MAX) ((DEST_MAX / ORIG_MAX) >= ORIG_MAX) - -#if defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__SIZEOF_INT128__) && !defined(__ibmxl__) -#define PSNIP_SAFE_HAVE_128 -typedef __int128 psnip_safe_int128_t; -typedef unsigned __int128 psnip_safe_uint128_t; -#endif /* defined(__GNUC__) */ - -#if !defined(PSNIP_SAFE_NO_FIXED) -#define PSNIP_SAFE_HAVE_INT8_LARGER -#define PSNIP_SAFE_HAVE_UINT8_LARGER -typedef psnip_int16_t psnip_safe_int8_larger; -typedef psnip_uint16_t psnip_safe_uint8_larger; - -#define PSNIP_SAFE_HAVE_INT16_LARGER -typedef psnip_int32_t psnip_safe_int16_larger; -typedef psnip_uint32_t psnip_safe_uint16_larger; - -#define PSNIP_SAFE_HAVE_INT32_LARGER -typedef psnip_int64_t psnip_safe_int32_larger; -typedef psnip_uint64_t psnip_safe_uint32_larger; - -#if defined(PSNIP_SAFE_HAVE_128) -#define PSNIP_SAFE_HAVE_INT64_LARGER -typedef psnip_safe_int128_t psnip_safe_int64_larger; -typedef psnip_safe_uint128_t psnip_safe_uint64_larger; -#endif /* defined(PSNIP_SAFE_HAVE_128) */ -#endif /* !defined(PSNIP_SAFE_NO_FIXED) */ - -#define PSNIP_SAFE_HAVE_LARGER_SCHAR -#if PSNIP_SAFE_IS_LARGER(SCHAR_MAX, SHRT_MAX) -typedef short psnip_safe_schar_larger; -#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, INT_MAX) -typedef int psnip_safe_schar_larger; -#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, LONG_MAX) -typedef long psnip_safe_schar_larger; -#elif PSNIP_SAFE_IS_LARGER(SCHAR_MAX, LLONG_MAX) -typedef long long psnip_safe_schar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fff) -typedef psnip_int16_t psnip_safe_schar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fffffffLL) -typedef psnip_int32_t psnip_safe_schar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SCHAR_MAX, 0x7fffffffffffffffLL) -typedef psnip_int64_t psnip_safe_schar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (SCHAR_MAX <= 0x7fffffffffffffffLL) -typedef psnip_safe_int128_t psnip_safe_schar_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_SCHAR -#endif - -#define PSNIP_SAFE_HAVE_LARGER_UCHAR -#if PSNIP_SAFE_IS_LARGER(UCHAR_MAX, USHRT_MAX) -typedef unsigned short psnip_safe_uchar_larger; -#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, UINT_MAX) -typedef unsigned int psnip_safe_uchar_larger; -#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, ULONG_MAX) -typedef unsigned long psnip_safe_uchar_larger; -#elif PSNIP_SAFE_IS_LARGER(UCHAR_MAX, ULLONG_MAX) -typedef unsigned long long psnip_safe_uchar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffU) -typedef psnip_uint16_t psnip_safe_uchar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffffffUL) -typedef psnip_uint32_t psnip_safe_uchar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UCHAR_MAX, 0xffffffffffffffffULL) -typedef psnip_uint64_t psnip_safe_uchar_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (UCHAR_MAX <= 0xffffffffffffffffULL) -typedef psnip_safe_uint128_t psnip_safe_uchar_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_UCHAR -#endif - -#if CHAR_MIN == 0 && defined(PSNIP_SAFE_HAVE_LARGER_UCHAR) -#define PSNIP_SAFE_HAVE_LARGER_CHAR -typedef psnip_safe_uchar_larger psnip_safe_char_larger; -#elif CHAR_MIN < 0 && defined(PSNIP_SAFE_HAVE_LARGER_SCHAR) -#define PSNIP_SAFE_HAVE_LARGER_CHAR -typedef psnip_safe_schar_larger psnip_safe_char_larger; -#endif - -#define PSNIP_SAFE_HAVE_LARGER_SHRT -#if PSNIP_SAFE_IS_LARGER(SHRT_MAX, INT_MAX) -typedef int psnip_safe_short_larger; -#elif PSNIP_SAFE_IS_LARGER(SHRT_MAX, LONG_MAX) -typedef long psnip_safe_short_larger; -#elif PSNIP_SAFE_IS_LARGER(SHRT_MAX, LLONG_MAX) -typedef long long psnip_safe_short_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fff) -typedef psnip_int16_t psnip_safe_short_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fffffffLL) -typedef psnip_int32_t psnip_safe_short_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(SHRT_MAX, 0x7fffffffffffffffLL) -typedef psnip_int64_t psnip_safe_short_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (SHRT_MAX <= 0x7fffffffffffffffLL) -typedef psnip_safe_int128_t psnip_safe_short_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_SHRT -#endif - -#define PSNIP_SAFE_HAVE_LARGER_USHRT -#if PSNIP_SAFE_IS_LARGER(USHRT_MAX, UINT_MAX) -typedef unsigned int psnip_safe_ushort_larger; -#elif PSNIP_SAFE_IS_LARGER(USHRT_MAX, ULONG_MAX) -typedef unsigned long psnip_safe_ushort_larger; -#elif PSNIP_SAFE_IS_LARGER(USHRT_MAX, ULLONG_MAX) -typedef unsigned long long psnip_safe_ushort_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffff) -typedef psnip_uint16_t psnip_safe_ushort_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffffffffUL) -typedef psnip_uint32_t psnip_safe_ushort_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(USHRT_MAX, 0xffffffffffffffffULL) -typedef psnip_uint64_t psnip_safe_ushort_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (USHRT_MAX <= 0xffffffffffffffffULL) -typedef psnip_safe_uint128_t psnip_safe_ushort_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_USHRT -#endif - -#define PSNIP_SAFE_HAVE_LARGER_INT -#if PSNIP_SAFE_IS_LARGER(INT_MAX, LONG_MAX) -typedef long psnip_safe_int_larger; -#elif PSNIP_SAFE_IS_LARGER(INT_MAX, LLONG_MAX) -typedef long long psnip_safe_int_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fff) -typedef psnip_int16_t psnip_safe_int_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fffffffLL) -typedef psnip_int32_t psnip_safe_int_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(INT_MAX, 0x7fffffffffffffffLL) -typedef psnip_int64_t psnip_safe_int_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (INT_MAX <= 0x7fffffffffffffffLL) -typedef psnip_safe_int128_t psnip_safe_int_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_INT -#endif - -#define PSNIP_SAFE_HAVE_LARGER_UINT -#if PSNIP_SAFE_IS_LARGER(UINT_MAX, ULONG_MAX) -typedef unsigned long psnip_safe_uint_larger; -#elif PSNIP_SAFE_IS_LARGER(UINT_MAX, ULLONG_MAX) -typedef unsigned long long psnip_safe_uint_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffff) -typedef psnip_uint16_t psnip_safe_uint_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffffffffUL) -typedef psnip_uint32_t psnip_safe_uint_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(UINT_MAX, 0xffffffffffffffffULL) -typedef psnip_uint64_t psnip_safe_uint_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (UINT_MAX <= 0xffffffffffffffffULL) -typedef psnip_safe_uint128_t psnip_safe_uint_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_UINT -#endif - -#define PSNIP_SAFE_HAVE_LARGER_LONG -#if PSNIP_SAFE_IS_LARGER(LONG_MAX, LLONG_MAX) -typedef long long psnip_safe_long_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fff) -typedef psnip_int16_t psnip_safe_long_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fffffffLL) -typedef psnip_int32_t psnip_safe_long_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LONG_MAX, 0x7fffffffffffffffLL) -typedef psnip_int64_t psnip_safe_long_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (LONG_MAX <= 0x7fffffffffffffffLL) -typedef psnip_safe_int128_t psnip_safe_long_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_LONG -#endif - -#define PSNIP_SAFE_HAVE_LARGER_ULONG -#if PSNIP_SAFE_IS_LARGER(ULONG_MAX, ULLONG_MAX) -typedef unsigned long long psnip_safe_ulong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffff) -typedef psnip_uint16_t psnip_safe_ulong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffffffffUL) -typedef psnip_uint32_t psnip_safe_ulong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULONG_MAX, 0xffffffffffffffffULL) -typedef psnip_uint64_t psnip_safe_ulong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (ULONG_MAX <= 0xffffffffffffffffULL) -typedef psnip_safe_uint128_t psnip_safe_ulong_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_ULONG -#endif - -#define PSNIP_SAFE_HAVE_LARGER_LLONG -#if !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fff) -typedef psnip_int16_t psnip_safe_llong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fffffffLL) -typedef psnip_int32_t psnip_safe_llong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(LLONG_MAX, 0x7fffffffffffffffLL) -typedef psnip_int64_t psnip_safe_llong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (LLONG_MAX <= 0x7fffffffffffffffLL) -typedef psnip_safe_int128_t psnip_safe_llong_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_LLONG -#endif - -#define PSNIP_SAFE_HAVE_LARGER_ULLONG -#if !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffff) -typedef psnip_uint16_t psnip_safe_ullong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffffffffUL) -typedef psnip_uint32_t psnip_safe_ullong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(ULLONG_MAX, 0xffffffffffffffffULL) -typedef psnip_uint64_t psnip_safe_ullong_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (ULLONG_MAX <= 0xffffffffffffffffULL) -typedef psnip_safe_uint128_t psnip_safe_ullong_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_ULLONG -#endif - -#if defined(PSNIP_SAFE_SIZE_MAX) -#define PSNIP_SAFE_HAVE_LARGER_SIZE -#if PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, USHRT_MAX) -typedef unsigned short psnip_safe_size_larger; -#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, UINT_MAX) -typedef unsigned int psnip_safe_size_larger; -#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, ULONG_MAX) -typedef unsigned long psnip_safe_size_larger; -#elif PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, ULLONG_MAX) -typedef unsigned long long psnip_safe_size_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffff) -typedef psnip_uint16_t psnip_safe_size_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffffffffUL) -typedef psnip_uint32_t psnip_safe_size_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && PSNIP_SAFE_IS_LARGER(PSNIP_SAFE_SIZE_MAX, 0xffffffffffffffffULL) -typedef psnip_uint64_t psnip_safe_size_larger; -#elif !defined(PSNIP_SAFE_NO_FIXED) && defined(PSNIP_SAFE_HAVE_128) && (PSNIP_SAFE_SIZE_MAX <= 0xffffffffffffffffULL) -typedef psnip_safe_uint128_t psnip_safe_size_larger; -#else -#undef PSNIP_SAFE_HAVE_LARGER_SIZE -#endif -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_SCHAR) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(signed char, schar) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_UCHAR) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned char, uchar) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_CHAR) -#if CHAR_MIN == 0 -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(char, char) -#else -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(char, char) -#endif -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_SHORT) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(short, short) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_USHORT) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned short, ushort) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_INT) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(int, int) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_UINT) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned int, uint) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_LONG) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(long, long) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_ULONG) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned long, ulong) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_LLONG) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(long long, llong) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_ULLONG) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(unsigned long long, ullong) -#endif - -#if defined(PSNIP_SAFE_HAVE_LARGER_SIZE) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(size_t, size) -#endif - -#if !defined(PSNIP_SAFE_NO_FIXED) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int8_t, int8) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint8_t, uint8) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int16_t, int16) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint16_t, uint16) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int32_t, int32) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint32_t, uint32) -#if defined(PSNIP_SAFE_HAVE_128) -PSNIP_SAFE_DEFINE_LARGER_SIGNED_OPS(psnip_int64_t, int64) -PSNIP_SAFE_DEFINE_LARGER_UNSIGNED_OPS(psnip_uint64_t, uint64) -#endif -#endif - -#endif /* !defined(PSNIP_SAFE_NO_PROMOTIONS) */ - -#define PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(T, name, op_name) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_##op_name(T* res, T a, T b) { \ - return !__builtin_##op_name##_overflow(a, b, res); \ - } - -#define PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(T, name, op_name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_##op_name(T* res, T a, T b) { \ - const psnip_safe_##name##_larger r = psnip_safe_larger_##name##_##op_name(a, b); \ - *res = (T) r; \ - return (r >= min) && (r <= max); \ - } - -#define PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(T, name, op_name, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_##op_name(T* res, T a, T b) { \ - const psnip_safe_##name##_larger r = psnip_safe_larger_##name##_##op_name(a, b); \ - *res = (T) r; \ - return (r <= max); \ - } - -#define PSNIP_SAFE_DEFINE_SIGNED_ADD(T, name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_add (T* res, T a, T b) { \ - psnip_safe_bool r = !( ((b > 0) && (a > (max - b))) || \ - ((b < 0) && (a < (min - b))) ); \ - if(PSNIP_SAFE_LIKELY(r)) \ - *res = a + b; \ - return r; \ - } - -#define PSNIP_SAFE_DEFINE_UNSIGNED_ADD(T, name, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_add (T* res, T a, T b) { \ - *res = (T) (a + b); \ - return !PSNIP_SAFE_UNLIKELY((b > 0) && (a > (max - b))); \ - } - -#define PSNIP_SAFE_DEFINE_SIGNED_SUB(T, name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_sub (T* res, T a, T b) { \ - psnip_safe_bool r = !((b > 0 && a < (min + b)) || \ - (b < 0 && a > (max + b))); \ - if(PSNIP_SAFE_LIKELY(r)) \ - *res = a - b; \ - return r; \ - } - -#define PSNIP_SAFE_DEFINE_UNSIGNED_SUB(T, name, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_sub (T* res, T a, T b) { \ - *res = a - b; \ - return !PSNIP_SAFE_UNLIKELY(b > a); \ - } - -#define PSNIP_SAFE_DEFINE_SIGNED_MUL(T, name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_mul (T* res, T a, T b) { \ - psnip_safe_bool r = 1; \ - if (a > 0) { \ - if (b > 0) { \ - if (a > (max / b)) { \ - r = 0; \ - } \ - } else { \ - if (b < (min / a)) { \ - r = 0; \ - } \ - } \ - } else { \ - if (b > 0) { \ - if (a < (min / b)) { \ - r = 0; \ - } \ - } else { \ - if ( (a != 0) && (b < (max / a))) { \ - r = 0; \ - } \ - } \ - } \ - if(PSNIP_SAFE_LIKELY(r)) \ - *res = a * b; \ - return r; \ - } - -#define PSNIP_SAFE_DEFINE_UNSIGNED_MUL(T, name, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_mul (T* res, T a, T b) { \ - *res = (T) (a * b); \ - return !PSNIP_SAFE_UNLIKELY((a > 0) && (b > 0) && (a > (max / b))); \ - } - -#define PSNIP_SAFE_DEFINE_SIGNED_DIV(T, name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_div (T* res, T a, T b) { \ - if (PSNIP_SAFE_UNLIKELY(b == 0)) { \ - *res = 0; \ - return 0; \ - } else if (PSNIP_SAFE_UNLIKELY(a == min && b == -1)) { \ - *res = min; \ - return 0; \ - } else { \ - *res = (T) (a / b); \ - return 1; \ - } \ - } - -#define PSNIP_SAFE_DEFINE_UNSIGNED_DIV(T, name, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_div (T* res, T a, T b) { \ - if (PSNIP_SAFE_UNLIKELY(b == 0)) { \ - *res = 0; \ - return 0; \ - } else { \ - *res = a / b; \ - return 1; \ - } \ - } - -#define PSNIP_SAFE_DEFINE_SIGNED_MOD(T, name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_mod (T* res, T a, T b) { \ - if (PSNIP_SAFE_UNLIKELY(b == 0)) { \ - *res = 0; \ - return 0; \ - } else if (PSNIP_SAFE_UNLIKELY(a == min && b == -1)) { \ - *res = min; \ - return 0; \ - } else { \ - *res = (T) (a % b); \ - return 1; \ - } \ - } - -#define PSNIP_SAFE_DEFINE_UNSIGNED_MOD(T, name, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_mod (T* res, T a, T b) { \ - if (PSNIP_SAFE_UNLIKELY(b == 0)) { \ - *res = 0; \ - return 0; \ - } else { \ - *res = a % b; \ - return 1; \ - } \ - } - -#define PSNIP_SAFE_DEFINE_SIGNED_NEG(T, name, min, max) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_neg (T* res, T value) { \ - psnip_safe_bool r = value != min; \ - *res = PSNIP_SAFE_LIKELY(r) ? -value : max; \ - return r; \ - } - -#define PSNIP_SAFE_DEFINE_INTSAFE(T, name, op, isf) \ - PSNIP_SAFE__FUNCTION psnip_safe_bool \ - psnip_safe_##name##_##op (T* res, T a, T b) { \ - return isf(a, b, res) == S_OK; \ - } - -#if CHAR_MIN == 0 -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_CHAR) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, add, CHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, sub, CHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(char, char, mul, CHAR_MAX) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(char, char, CHAR_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(char, char, CHAR_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(char, char, CHAR_MAX) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(char, char, CHAR_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(char, char, CHAR_MAX) -#else /* CHAR_MIN != 0 */ -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(char, char, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_CHAR) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, add, CHAR_MIN, CHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, sub, CHAR_MIN, CHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(char, char, mul, CHAR_MIN, CHAR_MAX) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(char, char, CHAR_MIN, CHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_SUB(char, char, CHAR_MIN, CHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MUL(char, char, CHAR_MIN, CHAR_MAX) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(char, char, CHAR_MIN, CHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MOD(char, char, CHAR_MIN, CHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_NEG(char, char, CHAR_MIN, CHAR_MAX) -#endif - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(signed char, schar, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_SCHAR) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, add, SCHAR_MIN, SCHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, sub, SCHAR_MIN, SCHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(signed char, schar, mul, SCHAR_MIN, SCHAR_MAX) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(signed char, schar, SCHAR_MIN, SCHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_SUB(signed char, schar, SCHAR_MIN, SCHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MUL(signed char, schar, SCHAR_MIN, SCHAR_MAX) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(signed char, schar, SCHAR_MIN, SCHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MOD(signed char, schar, SCHAR_MIN, SCHAR_MAX) -PSNIP_SAFE_DEFINE_SIGNED_NEG(signed char, schar, SCHAR_MIN, SCHAR_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned char, uchar, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_UCHAR) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, add, UCHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, sub, UCHAR_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned char, uchar, mul, UCHAR_MAX) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned char, uchar, UCHAR_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned char, uchar, UCHAR_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned char, uchar, UCHAR_MAX) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned char, uchar, UCHAR_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned char, uchar, UCHAR_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(short, short, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_SHORT) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, add, SHRT_MIN, SHRT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, sub, SHRT_MIN, SHRT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(short, short, mul, SHRT_MIN, SHRT_MAX) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(short, short, SHRT_MIN, SHRT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_SUB(short, short, SHRT_MIN, SHRT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MUL(short, short, SHRT_MIN, SHRT_MAX) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(short, short, SHRT_MIN, SHRT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MOD(short, short, SHRT_MIN, SHRT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_NEG(short, short, SHRT_MIN, SHRT_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned short, ushort, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, add, UShortAdd) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, sub, UShortSub) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned short, ushort, mul, UShortMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_USHORT) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, add, USHRT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, sub, USHRT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned short, ushort, mul, USHRT_MAX) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned short, ushort, USHRT_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned short, ushort, USHRT_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned short, ushort, USHRT_MAX) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned short, ushort, USHRT_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned short, ushort, USHRT_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(int, int, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_INT) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, add, INT_MIN, INT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, sub, INT_MIN, INT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(int, int, mul, INT_MIN, INT_MAX) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(int, int, INT_MIN, INT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_SUB(int, int, INT_MIN, INT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MUL(int, int, INT_MIN, INT_MAX) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(int, int, INT_MIN, INT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MOD(int, int, INT_MIN, INT_MAX) -PSNIP_SAFE_DEFINE_SIGNED_NEG(int, int, INT_MIN, INT_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned int, uint, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, add, UIntAdd) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, sub, UIntSub) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned int, uint, mul, UIntMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, add, UINT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, sub, UINT_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned int, uint, mul, UINT_MAX) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned int, uint, UINT_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned int, uint, UINT_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned int, uint, UINT_MAX) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned int, uint, UINT_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned int, uint, UINT_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long, long, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_LONG) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, add, LONG_MIN, LONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, sub, LONG_MIN, LONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long, long, mul, LONG_MIN, LONG_MAX) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(long, long, LONG_MIN, LONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_SUB(long, long, LONG_MIN, LONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MUL(long, long, LONG_MIN, LONG_MAX) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(long, long, LONG_MIN, LONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MOD(long, long, LONG_MIN, LONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_NEG(long, long, LONG_MIN, LONG_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long, ulong, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, add, ULongAdd) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, sub, ULongSub) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned long, ulong, mul, ULongMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_ULONG) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, add, ULONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, sub, ULONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long, ulong, mul, ULONG_MAX) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned long, ulong, ULONG_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned long, ulong, ULONG_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned long, ulong, ULONG_MAX) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned long, ulong, ULONG_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned long, ulong, ULONG_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(long long, llong, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_LLONG) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, add, LLONG_MIN, LLONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, sub, LLONG_MIN, LLONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(long long, llong, mul, LLONG_MIN, LLONG_MAX) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(long long, llong, LLONG_MIN, LLONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_SUB(long long, llong, LLONG_MIN, LLONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MUL(long long, llong, LLONG_MIN, LLONG_MAX) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(long long, llong, LLONG_MIN, LLONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_MOD(long long, llong, LLONG_MIN, LLONG_MAX) -PSNIP_SAFE_DEFINE_SIGNED_NEG(long long, llong, LLONG_MIN, LLONG_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(unsigned long long, ullong, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, add, ULongLongAdd) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, sub, ULongLongSub) -PSNIP_SAFE_DEFINE_INTSAFE(unsigned long long, ullong, mul, ULongLongMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_ULLONG) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, add, ULLONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, sub, ULLONG_MAX) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(unsigned long long, ullong, mul, ULLONG_MAX) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(unsigned long long, ullong, ULLONG_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(unsigned long long, ullong, ULLONG_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(unsigned long long, ullong, ULLONG_MAX) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(unsigned long long, ullong, ULLONG_MAX) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(unsigned long long, ullong, ULLONG_MAX) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(size_t, size, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) -PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, add, SizeTAdd) -PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, sub, SizeTSub) -PSNIP_SAFE_DEFINE_INTSAFE(size_t, size, mul, SizeTMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_SIZE) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, add, PSNIP_SAFE__SIZE_MAX_RT) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, sub, PSNIP_SAFE__SIZE_MAX_RT) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(size_t, size, mul, PSNIP_SAFE__SIZE_MAX_RT) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(size_t, size, PSNIP_SAFE__SIZE_MAX_RT) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(size_t, size, PSNIP_SAFE__SIZE_MAX_RT) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(size_t, size, PSNIP_SAFE__SIZE_MAX_RT) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(size_t, size, PSNIP_SAFE__SIZE_MAX_RT) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(size_t, size, PSNIP_SAFE__SIZE_MAX_RT) - -#if !defined(PSNIP_SAFE_NO_FIXED) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int8_t, int8, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_INT8) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, add, (-0x7fLL-1), 0x7f) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, sub, (-0x7fLL-1), 0x7f) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int8_t, int8, mul, (-0x7fLL-1), 0x7f) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int8_t, int8, (-0x7fLL-1), 0x7f) -PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int8_t, int8, (-0x7fLL-1), 0x7f) -PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int8_t, int8, (-0x7fLL-1), 0x7f) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int8_t, int8, (-0x7fLL-1), 0x7f) -PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int8_t, int8, (-0x7fLL-1), 0x7f) -PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int8_t, int8, (-0x7fLL-1), 0x7f) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint8_t, uint8, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT8) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, add, 0xff) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, sub, 0xff) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint8_t, uint8, mul, 0xff) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint8_t, uint8, 0xff) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint8_t, uint8, 0xff) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint8_t, uint8, 0xff) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint8_t, uint8, 0xff) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint8_t, uint8, 0xff) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int16_t, int16, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_INT16) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, add, (-32767-1), 0x7fff) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, sub, (-32767-1), 0x7fff) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int16_t, int16, mul, (-32767-1), 0x7fff) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int16_t, int16, (-32767-1), 0x7fff) -PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int16_t, int16, (-32767-1), 0x7fff) -PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int16_t, int16, (-32767-1), 0x7fff) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int16_t, int16, (-32767-1), 0x7fff) -PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int16_t, int16, (-32767-1), 0x7fff) -PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int16_t, int16, (-32767-1), 0x7fff) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint16_t, uint16, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, add, UShortAdd) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, sub, UShortSub) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint16_t, uint16, mul, UShortMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT16) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, add, 0xffff) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, sub, 0xffff) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint16_t, uint16, mul, 0xffff) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint16_t, uint16, 0xffff) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint16_t, uint16, 0xffff) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint16_t, uint16, 0xffff) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint16_t, uint16, 0xffff) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint16_t, uint16, 0xffff) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int32_t, int32, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_INT32) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, add, (-0x7fffffffLL-1), 0x7fffffffLL) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, sub, (-0x7fffffffLL-1), 0x7fffffffLL) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int32_t, int32, mul, (-0x7fffffffLL-1), 0x7fffffffLL) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int32_t, int32, (-0x7fffffffLL-1), 0x7fffffffLL) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint32_t, uint32, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, add, UIntAdd) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, sub, UIntSub) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint32_t, uint32, mul, UIntMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT32) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, add, 0xffffffffUL) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, sub, 0xffffffffUL) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint32_t, uint32, mul, 0xffffffffUL) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint32_t, uint32, 0xffffffffUL) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint32_t, uint32, 0xffffffffUL) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint32_t, uint32, 0xffffffffUL) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint32_t, uint32, 0xffffffffUL) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint32_t, uint32, 0xffffffffUL) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_int64_t, int64, mul) -#elif defined(PSNIP_SAFE_HAVE_LARGER_INT64) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, add, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, sub, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -PSNIP_SAFE_DEFINE_PROMOTED_SIGNED_BINARY_OP(psnip_int64_t, int64, mul, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -#else -PSNIP_SAFE_DEFINE_SIGNED_ADD(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_SUB(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_MUL(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -#endif -PSNIP_SAFE_DEFINE_SIGNED_DIV(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_MOD(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) -PSNIP_SAFE_DEFINE_SIGNED_NEG(psnip_int64_t, int64, (-0x7fffffffffffffffLL-1), 0x7fffffffffffffffLL) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, add) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, sub) -PSNIP_SAFE_DEFINE_BUILTIN_BINARY_OP(psnip_uint64_t, uint64, mul) -#elif defined(PSNIP_SAFE_HAVE_INTSAFE_H) && defined(_WIN32) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, add, ULongLongAdd) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, sub, ULongLongSub) -PSNIP_SAFE_DEFINE_INTSAFE(psnip_uint64_t, uint64, mul, ULongLongMult) -#elif defined(PSNIP_SAFE_HAVE_LARGER_UINT64) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, add, 0xffffffffffffffffULL) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, sub, 0xffffffffffffffffULL) -PSNIP_SAFE_DEFINE_PROMOTED_UNSIGNED_BINARY_OP(psnip_uint64_t, uint64, mul, 0xffffffffffffffffULL) -#else -PSNIP_SAFE_DEFINE_UNSIGNED_ADD(psnip_uint64_t, uint64, 0xffffffffffffffffULL) -PSNIP_SAFE_DEFINE_UNSIGNED_SUB(psnip_uint64_t, uint64, 0xffffffffffffffffULL) -PSNIP_SAFE_DEFINE_UNSIGNED_MUL(psnip_uint64_t, uint64, 0xffffffffffffffffULL) -#endif -PSNIP_SAFE_DEFINE_UNSIGNED_DIV(psnip_uint64_t, uint64, 0xffffffffffffffffULL) -PSNIP_SAFE_DEFINE_UNSIGNED_MOD(psnip_uint64_t, uint64, 0xffffffffffffffffULL) - -#endif /* !defined(PSNIP_SAFE_NO_FIXED) */ - -#define PSNIP_SAFE_C11_GENERIC_SELECTION(res, op) \ - _Generic((*res), \ - char: psnip_safe_char_##op, \ - unsigned char: psnip_safe_uchar_##op, \ - short: psnip_safe_short_##op, \ - unsigned short: psnip_safe_ushort_##op, \ - int: psnip_safe_int_##op, \ - unsigned int: psnip_safe_uint_##op, \ - long: psnip_safe_long_##op, \ - unsigned long: psnip_safe_ulong_##op, \ - long long: psnip_safe_llong_##op, \ - unsigned long long: psnip_safe_ullong_##op) - -#define PSNIP_SAFE_C11_GENERIC_BINARY_OP(op, res, a, b) \ - PSNIP_SAFE_C11_GENERIC_SELECTION(res, op)(res, a, b) -#define PSNIP_SAFE_C11_GENERIC_UNARY_OP(op, res, v) \ - PSNIP_SAFE_C11_GENERIC_SELECTION(res, op)(res, v) - -#if defined(PSNIP_SAFE_HAVE_BUILTIN_OVERFLOW) -#define psnip_safe_add(res, a, b) !__builtin_add_overflow(a, b, res) -#define psnip_safe_sub(res, a, b) !__builtin_sub_overflow(a, b, res) -#define psnip_safe_mul(res, a, b) !__builtin_mul_overflow(a, b, res) -#define psnip_safe_div(res, a, b) !__builtin_div_overflow(a, b, res) -#define psnip_safe_mod(res, a, b) !__builtin_mod_overflow(a, b, res) -#define psnip_safe_neg(res, v) PSNIP_SAFE_C11_GENERIC_UNARY_OP (neg, res, v) - -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) -/* The are no fixed-length or size selections because they cause an - * error about _Generic specifying two compatible types. Hopefully - * this doesn't cause problems on exotic platforms, but if it does - * please let me know and I'll try to figure something out. */ - -#define psnip_safe_add(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(add, res, a, b) -#define psnip_safe_sub(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(sub, res, a, b) -#define psnip_safe_mul(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(mul, res, a, b) -#define psnip_safe_div(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(div, res, a, b) -#define psnip_safe_mod(res, a, b) PSNIP_SAFE_C11_GENERIC_BINARY_OP(mod, res, a, b) -#define psnip_safe_neg(res, v) PSNIP_SAFE_C11_GENERIC_UNARY_OP (neg, res, v) -#endif - -#if !defined(PSNIP_SAFE_HAVE_BUILTINS) && (defined(PSNIP_SAFE_EMULATE_NATIVE) || defined(PSNIP_BUILTIN_EMULATE_NATIVE)) -# define __builtin_sadd_overflow(a, b, res) (!psnip_safe_int_add(res, a, b)) -# define __builtin_saddl_overflow(a, b, res) (!psnip_safe_long_add(res, a, b)) -# define __builtin_saddll_overflow(a, b, res) (!psnip_safe_llong_add(res, a, b)) -# define __builtin_uadd_overflow(a, b, res) (!psnip_safe_uint_add(res, a, b)) -# define __builtin_uaddl_overflow(a, b, res) (!psnip_safe_ulong_add(res, a, b)) -# define __builtin_uaddll_overflow(a, b, res) (!psnip_safe_ullong_add(res, a, b)) - -# define __builtin_ssub_overflow(a, b, res) (!psnip_safe_int_sub(res, a, b)) -# define __builtin_ssubl_overflow(a, b, res) (!psnip_safe_long_sub(res, a, b)) -# define __builtin_ssubll_overflow(a, b, res) (!psnip_safe_llong_sub(res, a, b)) -# define __builtin_usub_overflow(a, b, res) (!psnip_safe_uint_sub(res, a, b)) -# define __builtin_usubl_overflow(a, b, res) (!psnip_safe_ulong_sub(res, a, b)) -# define __builtin_usubll_overflow(a, b, res) (!psnip_safe_ullong_sub(res, a, b)) - -# define __builtin_smul_overflow(a, b, res) (!psnip_safe_int_mul(res, a, b)) -# define __builtin_smull_overflow(a, b, res) (!psnip_safe_long_mul(res, a, b)) -# define __builtin_smulll_overflow(a, b, res) (!psnip_safe_llong_mul(res, a, b)) -# define __builtin_umul_overflow(a, b, res) (!psnip_safe_uint_mul(res, a, b)) -# define __builtin_umull_overflow(a, b, res) (!psnip_safe_ulong_mul(res, a, b)) -# define __builtin_umulll_overflow(a, b, res) (!psnip_safe_ullong_mul(res, a, b)) -#endif - -#endif /* !defined(PSNIP_SAFE_H) */ diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/xxhash.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/xxhash.h deleted file mode 100644 index a33cdf86..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/xxhash.h +++ /dev/null @@ -1,18 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/vendored/xxhash/xxhash.h" diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/xxhash/xxhash.h b/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/xxhash/xxhash.h deleted file mode 100644 index a18e8c76..00000000 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/vendored/xxhash/xxhash.h +++ /dev/null @@ -1,6773 +0,0 @@ -/* - * xxHash - Extremely Fast Hash algorithm - * Header File - * Copyright (C) 2012-2021 Yann Collet - * - * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You can contact the author at: - * - xxHash homepage: https://www.xxhash.com - * - xxHash source repository: https://github.com/Cyan4973/xxHash - */ - -/*! - * @mainpage xxHash - * - * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed - * limits. - * - * It is proposed in four flavors, in three families: - * 1. @ref XXH32_family - * - Classic 32-bit hash function. Simple, compact, and runs on almost all - * 32-bit and 64-bit systems. - * 2. @ref XXH64_family - * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most - * 64-bit systems (but _not_ 32-bit systems). - * 3. @ref XXH3_family - * - Modern 64-bit and 128-bit hash function family which features improved - * strength and performance across the board, especially on smaller data. - * It benefits greatly from SIMD and 64-bit without requiring it. - * - * Benchmarks - * --- - * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04. - * The open source benchmark program is compiled with clang v10.0 using -O3 flag. - * - * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity | - * | -------------------- | ------- | ----: | ---------------: | ------------------: | - * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 | - * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 | - * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 | - * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 | - * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 | - * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 | - * | RAM sequential read | | N/A | 28.0 GB/s | N/A | - * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 | - * | City64 | | 64 | 22.0 GB/s | 76.6 | - * | T1ha2 | | 64 | 22.0 GB/s | 99.0 | - * | City128 | | 128 | 21.7 GB/s | 57.7 | - * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 | - * | XXH64() | | 64 | 19.4 GB/s | 71.0 | - * | SpookyHash | | 64 | 19.3 GB/s | 53.2 | - * | Mum | | 64 | 18.0 GB/s | 67.0 | - * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 | - * | XXH32() | | 32 | 9.7 GB/s | 71.9 | - * | City32 | | 32 | 9.1 GB/s | 66.0 | - * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 | - * | Murmur3 | | 32 | 3.9 GB/s | 56.1 | - * | SipHash* | | 64 | 3.0 GB/s | 43.2 | - * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 | - * | HighwayHash | | 64 | 1.4 GB/s | 6.0 | - * | FNV64 | | 64 | 1.2 GB/s | 62.7 | - * | Blake2* | | 256 | 1.1 GB/s | 5.1 | - * | SHA1* | | 160 | 0.8 GB/s | 5.6 | - * | MD5* | | 128 | 0.6 GB/s | 7.8 | - * @note - * - Hashes which require a specific ISA extension are noted. SSE2 is also noted, - * even though it is mandatory on x64. - * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic - * by modern standards. - * - Small data velocity is a rough average of algorithm's efficiency for small - * data. For more accurate information, see the wiki. - * - More benchmarks and strength tests are found on the wiki: - * https://github.com/Cyan4973/xxHash/wiki - * - * Usage - * ------ - * All xxHash variants use a similar API. Changing the algorithm is a trivial - * substitution. - * - * @pre - * For functions which take an input and length parameter, the following - * requirements are assumed: - * - The range from [`input`, `input + length`) is valid, readable memory. - * - The only exception is if the `length` is `0`, `input` may be `NULL`. - * - For C++, the objects must have the *TriviallyCopyable* property, as the - * functions access bytes directly as if it was an array of `unsigned char`. - * - * @anchor single_shot_example - * **Single Shot** - * - * These functions are stateless functions which hash a contiguous block of memory, - * immediately returning the result. They are the easiest and usually the fastest - * option. - * - * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits() - * - * @code{.c} - * #include - * #include "xxhash.h" - * - * // Example for a function which hashes a null terminated string with XXH32(). - * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed) - * { - * // NULL pointers are only valid if the length is zero - * size_t length = (string == NULL) ? 0 : strlen(string); - * return XXH32(string, length, seed); - * } - * @endcode - * - * @anchor streaming_example - * **Streaming** - * - * These groups of functions allow incremental hashing of unknown size, even - * more than what would fit in a size_t. - * - * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset() - * - * @code{.c} - * #include - * #include - * #include "xxhash.h" - * // Example for a function which hashes a FILE incrementally with XXH3_64bits(). - * XXH64_hash_t hashFile(FILE* f) - * { - * // Allocate a state struct. Do not just use malloc() or new. - * XXH3_state_t* state = XXH3_createState(); - * assert(state != NULL && "Out of memory!"); - * // Reset the state to start a new hashing session. - * XXH3_64bits_reset(state); - * char buffer[4096]; - * size_t count; - * // Read the file in chunks - * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) { - * // Run update() as many times as necessary to process the data - * XXH3_64bits_update(state, buffer, count); - * } - * // Retrieve the finalized hash. This will not change the state. - * XXH64_hash_t result = XXH3_64bits_digest(state); - * // Free the state. Do not use free(). - * XXH3_freeState(state); - * return result; - * } - * @endcode - * - * @file xxhash.h - * xxHash prototypes and implementation - */ - -#if defined (__cplusplus) -extern "C" { -#endif - -/* **************************** - * INLINE mode - ******************************/ -/*! - * @defgroup public Public API - * Contains details on the public xxHash functions. - * @{ - */ -#ifdef XXH_DOXYGEN -/*! - * @brief Gives access to internal state declaration, required for static allocation. - * - * Incompatible with dynamic linking, due to risks of ABI changes. - * - * Usage: - * @code{.c} - * #define XXH_STATIC_LINKING_ONLY - * #include "xxhash.h" - * @endcode - */ -# define XXH_STATIC_LINKING_ONLY -/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */ - -/*! - * @brief Gives access to internal definitions. - * - * Usage: - * @code{.c} - * #define XXH_STATIC_LINKING_ONLY - * #define XXH_IMPLEMENTATION - * #include "xxhash.h" - * @endcode - */ -# define XXH_IMPLEMENTATION -/* Do not undef XXH_IMPLEMENTATION for Doxygen */ - -/*! - * @brief Exposes the implementation and marks all functions as `inline`. - * - * Use these build macros to inline xxhash into the target unit. - * Inlining improves performance on small inputs, especially when the length is - * expressed as a compile-time constant: - * - * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html - * - * It also keeps xxHash symbols private to the unit, so they are not exported. - * - * Usage: - * @code{.c} - * #define XXH_INLINE_ALL - * #include "xxhash.h" - * @endcode - * Do not compile and link xxhash.o as a separate object, as it is not useful. - */ -# define XXH_INLINE_ALL -# undef XXH_INLINE_ALL -/*! - * @brief Exposes the implementation without marking functions as inline. - */ -# define XXH_PRIVATE_API -# undef XXH_PRIVATE_API -/*! - * @brief Emulate a namespace by transparently prefixing all symbols. - * - * If you want to include _and expose_ xxHash functions from within your own - * library, but also want to avoid symbol collisions with other libraries which - * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix - * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE - * (therefore, avoid empty or numeric values). - * - * Note that no change is required within the calling program as long as it - * includes `xxhash.h`: Regular symbol names will be automatically translated - * by this header. - */ -# define XXH_NAMESPACE /* YOUR NAME HERE */ -# undef XXH_NAMESPACE -#endif - -#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ - && !defined(XXH_INLINE_ALL_31684351384) - /* this section should be traversed only once */ -# define XXH_INLINE_ALL_31684351384 - /* give access to the advanced API, required to compile implementations */ -# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ -# define XXH_STATIC_LINKING_ONLY - /* make all functions private */ -# undef XXH_PUBLIC_API -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((unused)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else - /* note: this version may generate warnings for unused static functions */ -# define XXH_PUBLIC_API static -# endif - - /* - * This part deals with the special case where a unit wants to inline xxHash, - * but "xxhash.h" has previously been included without XXH_INLINE_ALL, - * such as part of some previously included *.h header file. - * Without further action, the new include would just be ignored, - * and functions would effectively _not_ be inlined (silent failure). - * The following macros solve this situation by prefixing all inlined names, - * avoiding naming collision with previous inclusions. - */ - /* Before that, we unconditionally #undef all symbols, - * in case they were already defined with XXH_NAMESPACE. - * They will then be redefined for XXH_INLINE_ALL - */ -# undef XXH_versionNumber - /* XXH32 */ -# undef XXH32 -# undef XXH32_createState -# undef XXH32_freeState -# undef XXH32_reset -# undef XXH32_update -# undef XXH32_digest -# undef XXH32_copyState -# undef XXH32_canonicalFromHash -# undef XXH32_hashFromCanonical - /* XXH64 */ -# undef XXH64 -# undef XXH64_createState -# undef XXH64_freeState -# undef XXH64_reset -# undef XXH64_update -# undef XXH64_digest -# undef XXH64_copyState -# undef XXH64_canonicalFromHash -# undef XXH64_hashFromCanonical - /* XXH3_64bits */ -# undef XXH3_64bits -# undef XXH3_64bits_withSecret -# undef XXH3_64bits_withSeed -# undef XXH3_64bits_withSecretandSeed -# undef XXH3_createState -# undef XXH3_freeState -# undef XXH3_copyState -# undef XXH3_64bits_reset -# undef XXH3_64bits_reset_withSeed -# undef XXH3_64bits_reset_withSecret -# undef XXH3_64bits_update -# undef XXH3_64bits_digest -# undef XXH3_generateSecret - /* XXH3_128bits */ -# undef XXH128 -# undef XXH3_128bits -# undef XXH3_128bits_withSeed -# undef XXH3_128bits_withSecret -# undef XXH3_128bits_reset -# undef XXH3_128bits_reset_withSeed -# undef XXH3_128bits_reset_withSecret -# undef XXH3_128bits_reset_withSecretandSeed -# undef XXH3_128bits_update -# undef XXH3_128bits_digest -# undef XXH128_isEqual -# undef XXH128_cmp -# undef XXH128_canonicalFromHash -# undef XXH128_hashFromCanonical - /* Finally, free the namespace itself */ -# undef XXH_NAMESPACE - - /* employ the namespace for XXH_INLINE_ALL */ -# define XXH_NAMESPACE XXH_INLINE_ - /* - * Some identifiers (enums, type names) are not symbols, - * but they must nonetheless be renamed to avoid redeclaration. - * Alternative solution: do not redeclare them. - * However, this requires some #ifdefs, and has a more dispersed impact. - * Meanwhile, renaming can be achieved in a single place. - */ -# define XXH_IPREF(Id) XXH_NAMESPACE ## Id -# define XXH_OK XXH_IPREF(XXH_OK) -# define XXH_ERROR XXH_IPREF(XXH_ERROR) -# define XXH_errorcode XXH_IPREF(XXH_errorcode) -# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) -# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) -# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) -# define XXH32_state_s XXH_IPREF(XXH32_state_s) -# define XXH32_state_t XXH_IPREF(XXH32_state_t) -# define XXH64_state_s XXH_IPREF(XXH64_state_s) -# define XXH64_state_t XXH_IPREF(XXH64_state_t) -# define XXH3_state_s XXH_IPREF(XXH3_state_s) -# define XXH3_state_t XXH_IPREF(XXH3_state_t) -# define XXH128_hash_t XXH_IPREF(XXH128_hash_t) - /* Ensure the header is parsed again, even if it was previously included */ -# undef XXHASH_H_5627135585666179 -# undef XXHASH_H_STATIC_13879238742 -#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ - -/* **************************************************************** - * Stable API - *****************************************************************/ -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - -/*! @brief Marks a global symbol. */ -#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) -# ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) -# elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) -# endif -# else -# define XXH_PUBLIC_API /* do nothing */ -# endif -#endif - -#ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) -/* XXH32 */ -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) -/* XXH64 */ -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) -/* XXH3_64bits */ -# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) -# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) -# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) -# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed) -# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) -# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) -# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) -# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) -# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) -# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) -# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed) -# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) -# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) -# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) -# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed) -/* XXH3_128bits */ -# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) -# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) -# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) -# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) -# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed) -# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) -# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) -# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) -# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed) -# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) -# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) -# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) -# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) -# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) -# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) -#endif - - -/* ************************************* -* Compiler specifics -***************************************/ - -/* specific declaration modes for Windows */ -#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) -# ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) -# elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) -# endif -# else -# define XXH_PUBLIC_API /* do nothing */ -# endif -#endif - -#if defined (__GNUC__) -# define XXH_CONSTF __attribute__((const)) -# define XXH_PUREF __attribute__((pure)) -# define XXH_MALLOCF __attribute__((malloc)) -#else -# define XXH_CONSTF /* disable */ -# define XXH_PUREF -# define XXH_MALLOCF -#endif - -/* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 8 -#define XXH_VERSION_RELEASE 2 -/*! @brief Version number, encoded as two digits each */ -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) - -/*! - * @brief Obtains the xxHash version. - * - * This is mostly useful when xxHash is compiled as a shared library, - * since the returned value comes from the library, as opposed to header file. - * - * @return @ref XXH_VERSION_NUMBER of the invoked library. - */ -XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void); - - -/* **************************** -* Common basic types -******************************/ -#include /* size_t */ -/*! - * @brief Exit code for the streaming API. - */ -typedef enum { - XXH_OK = 0, /*!< OK */ - XXH_ERROR /*!< Error */ -} XXH_errorcode; - - -/*-********************************************************************** -* 32-bit hash -************************************************************************/ -#if defined(XXH_DOXYGEN) /* Don't show include */ -/*! - * @brief An unsigned 32-bit integer. - * - * Not necessarily defined to `uint32_t` but functionally equivalent. - */ -typedef uint32_t XXH32_hash_t; - -#elif !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint32_t XXH32_hash_t; - -#else -# include -# if UINT_MAX == 0xFFFFFFFFUL - typedef unsigned int XXH32_hash_t; -# elif ULONG_MAX == 0xFFFFFFFFUL - typedef unsigned long XXH32_hash_t; -# else -# error "unsupported platform: need a 32-bit type" -# endif -#endif - -/*! - * @} - * - * @defgroup XXH32_family XXH32 family - * @ingroup public - * Contains functions used in the classic 32-bit xxHash algorithm. - * - * @note - * XXH32 is useful for older platforms, with no or poor 64-bit performance. - * Note that the @ref XXH3_family provides competitive speed for both 32-bit - * and 64-bit systems, and offers true 64/128 bit hash results. - * - * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families - * @see @ref XXH32_impl for implementation details - * @{ - */ - -/*! - * @brief Calculates the 32-bit hash of @p input using xxHash32. - * - * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s - * - * See @ref single_shot_example "Single Shot Example" for an example. - * - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * @param seed The 32-bit seed to alter the hash's output predictably. - * - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return The calculated 32-bit hash value. - * - * @see - * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128(): - * Direct equivalents for the other variants of xxHash. - * @see - * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); - -#ifndef XXH_NO_STREAM -/*! - * Streaming functions generate the xxHash value from an incremental input. - * This method is slower than single-call functions, due to state management. - * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. - * - * An XXH state must first be allocated using `XXH*_createState()`. - * - * Start a new hash by initializing the state with a seed using `XXH*_reset()`. - * - * Then, feed the hash state by calling `XXH*_update()` as many times as necessary. - * - * The function returns an error code, with 0 meaning OK, and any other value - * meaning there is an error. - * - * Finally, a hash value can be produced anytime, by using `XXH*_digest()`. - * This function returns the nn-bits hash as an int or long long. - * - * It's still possible to continue inserting input into the hash state after a - * digest, and generate new hash values later on by invoking `XXH*_digest()`. - * - * When done, release the state using `XXH*_freeState()`. - * - * @see streaming_example at the top of @ref xxhash.h for an example. - */ - -/*! - * @typedef struct XXH32_state_s XXH32_state_t - * @brief The opaque state struct for the XXH32 streaming API. - * - * @see XXH32_state_s for details. - */ -typedef struct XXH32_state_s XXH32_state_t; - -/*! - * @brief Allocates an @ref XXH32_state_t. - * - * Must be freed with XXH32_freeState(). - * @return An allocated XXH32_state_t on success, `NULL` on failure. - */ -XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void); -/*! - * @brief Frees an @ref XXH32_state_t. - * - * Must be allocated with XXH32_createState(). - * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState(). - * @return XXH_OK. - */ -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); -/*! - * @brief Copies one @ref XXH32_state_t to another. - * - * @param dst_state The state to copy to. - * @param src_state The state to copy from. - * @pre - * @p dst_state and @p src_state must not be `NULL` and must not overlap. - */ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); - -/*! - * @brief Resets an @ref XXH32_state_t to begin a new hash. - * - * This function resets and seeds a state. Call it before @ref XXH32_update(). - * - * @param statePtr The state struct to reset. - * @param seed The 32-bit seed to alter the hash result predictably. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); - -/*! - * @brief Consumes a block of @p input to an @ref XXH32_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); - -/*! - * @brief Returns the calculated hash value from an @ref XXH32_state_t. - * - * @note - * Calling XXH32_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated xxHash32 value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ - -/******* Canonical representation *******/ - -/* - * The default return values from XXH functions are unsigned 32 and 64 bit - * integers. - * This the simplest and fastest format for further post-processing. - * - * However, this leaves open the question of what is the order on the byte level, - * since little and big endian conventions will store the same number differently. - * - * The canonical representation settles this issue by mandating big-endian - * convention, the same convention as human-readable numbers (large digits first). - * - * When writing hash values to storage, sending them over a network, or printing - * them, it's highly recommended to use the canonical representation to ensure - * portability across a wider range of systems, present and future. - * - * The following functions allow transformation of hash values to and from - * canonical format. - */ - -/*! - * @brief Canonical (big endian) representation of @ref XXH32_hash_t. - */ -typedef struct { - unsigned char digest[4]; /*!< Hash bytes, big endian */ -} XXH32_canonical_t; - -/*! - * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t. - * - * @param dst The @ref XXH32_canonical_t pointer to be stored to. - * @param hash The @ref XXH32_hash_t to be converted. - * - * @pre - * @p dst must not be `NULL`. - */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); - -/*! - * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t. - * - * @param src The @ref XXH32_canonical_t to convert. - * - * @pre - * @p src must not be `NULL`. - * - * @return The converted hash. - */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); - - -/*! @cond Doxygen ignores this part */ -#ifdef __has_attribute -# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) -#else -# define XXH_HAS_ATTRIBUTE(x) 0 -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* - * C23 __STDC_VERSION__ number hasn't been specified yet. For now - * leave as `201711L` (C17 + 1). - * TODO: Update to correct value when its been specified. - */ -#define XXH_C23_VN 201711L -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* C-language Attributes are added in C23. */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute) -# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) -#else -# define XXH_HAS_C_ATTRIBUTE(x) 0 -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -#if defined(__cplusplus) && defined(__has_cpp_attribute) -# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) -#else -# define XXH_HAS_CPP_ATTRIBUTE(x) 0 -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* - * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute - * introduced in CPP17 and C23. - * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough - * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough - */ -#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough) -# define XXH_FALLTHROUGH [[fallthrough]] -#elif XXH_HAS_ATTRIBUTE(__fallthrough__) -# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__)) -#else -# define XXH_FALLTHROUGH /* fallthrough */ -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* - * Define XXH_NOESCAPE for annotated pointers in public API. - * https://clang.llvm.org/docs/AttributeReference.html#noescape - * As of writing this, only supported by clang. - */ -#if XXH_HAS_ATTRIBUTE(noescape) -# define XXH_NOESCAPE __attribute__((noescape)) -#else -# define XXH_NOESCAPE -#endif -/*! @endcond */ - - -/*! - * @} - * @ingroup public - * @{ - */ - -#ifndef XXH_NO_LONG_LONG -/*-********************************************************************** -* 64-bit hash -************************************************************************/ -#if defined(XXH_DOXYGEN) /* don't include */ -/*! - * @brief An unsigned 64-bit integer. - * - * Not necessarily defined to `uint64_t` but functionally equivalent. - */ -typedef uint64_t XXH64_hash_t; -#elif !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint64_t XXH64_hash_t; -#else -# include -# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL - /* LP64 ABI says uint64_t is unsigned long */ - typedef unsigned long XXH64_hash_t; -# else - /* the following type must have a width of 64-bit */ - typedef unsigned long long XXH64_hash_t; -# endif -#endif - -/*! - * @} - * - * @defgroup XXH64_family XXH64 family - * @ingroup public - * @{ - * Contains functions used in the classic 64-bit xxHash algorithm. - * - * @note - * XXH3 provides competitive speed for both 32-bit and 64-bit systems, - * and offers true 64/128 bit hash results. - * It provides better speed for systems with vector processing capabilities. - */ - -/*! - * @brief Calculates the 64-bit hash of @p input using xxHash64. - * - * This function usually runs faster on 64-bit systems, but slower on 32-bit - * systems (see benchmark). - * - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * @param seed The 64-bit seed to alter the hash's output predictably. - * - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return The calculated 64-bit hash. - * - * @see - * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128(): - * Direct equivalents for the other variants of xxHash. - * @see - * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); - -/******* Streaming *******/ -#ifndef XXH_NO_STREAM -/*! - * @brief The opaque state struct for the XXH64 streaming API. - * - * @see XXH64_state_s for details. - */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ - -/*! - * @brief Allocates an @ref XXH64_state_t. - * - * Must be freed with XXH64_freeState(). - * @return An allocated XXH64_state_t on success, `NULL` on failure. - */ -XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void); - -/*! - * @brief Frees an @ref XXH64_state_t. - * - * Must be allocated with XXH64_createState(). - * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState(). - * @return XXH_OK. - */ -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - -/*! - * @brief Copies one @ref XXH64_state_t to another. - * - * @param dst_state The state to copy to. - * @param src_state The state to copy from. - * @pre - * @p dst_state and @p src_state must not be `NULL` and must not overlap. - */ -XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state); - -/*! - * @brief Resets an @ref XXH64_state_t to begin a new hash. - * - * This function resets and seeds a state. Call it before @ref XXH64_update(). - * - * @param statePtr The state struct to reset. - * @param seed The 64-bit seed to alter the hash result predictably. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed); - -/*! - * @brief Consumes a block of @p input to an @ref XXH64_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief Returns the calculated hash value from an @ref XXH64_state_t. - * - * @note - * Calling XXH64_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated xxHash64 value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ -/******* Canonical representation *******/ - -/*! - * @brief Canonical (big endian) representation of @ref XXH64_hash_t. - */ -typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; - -/*! - * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t. - * - * @param dst The @ref XXH64_canonical_t pointer to be stored to. - * @param hash The @ref XXH64_hash_t to be converted. - * - * @pre - * @p dst must not be `NULL`. - */ -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash); - -/*! - * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t. - * - * @param src The @ref XXH64_canonical_t to convert. - * - * @pre - * @p src must not be `NULL`. - * - * @return The converted hash. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src); - -#ifndef XXH_NO_XXH3 - -/*! - * @} - * ************************************************************************ - * @defgroup XXH3_family XXH3 family - * @ingroup public - * @{ - * - * XXH3 is a more recent hash algorithm featuring: - * - Improved speed for both small and large inputs - * - True 64-bit and 128-bit outputs - * - SIMD acceleration - * - Improved 32-bit viability - * - * Speed analysis methodology is explained here: - * - * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html - * - * Compared to XXH64, expect XXH3 to run approximately - * ~2x faster on large inputs and >3x faster on small ones, - * exact differences vary depending on platform. - * - * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic, - * but does not require it. - * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3 - * at competitive speeds, even without vector support. Further details are - * explained in the implementation. - * - * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD - * implementations for many common platforms: - * - AVX512 - * - AVX2 - * - SSE2 - * - ARM NEON - * - WebAssembly SIMD128 - * - POWER8 VSX - * - s390x ZVector - * This can be controlled via the @ref XXH_VECTOR macro, but it automatically - * selects the best version according to predefined macros. For the x86 family, an - * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c. - * - * XXH3 implementation is portable: - * it has a generic C90 formulation that can be compiled on any platform, - * all implementations generate exactly the same hash value on all platforms. - * Starting from v0.8.0, it's also labelled "stable", meaning that - * any future version will also generate the same hash value. - * - * XXH3 offers 2 variants, _64bits and _128bits. - * - * When only 64 bits are needed, prefer invoking the _64bits variant, as it - * reduces the amount of mixing, resulting in faster speed on small inputs. - * It's also generally simpler to manipulate a scalar return type than a struct. - * - * The API supports one-shot hashing, streaming mode, and custom secrets. - */ -/*-********************************************************************** -* XXH3 64-bit variant -************************************************************************/ - -/*! - * @brief 64-bit unseeded variant of XXH3. - * - * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however - * it may have slightly better performance due to constant propagation of the - * defaults. - * - * @see - * XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms - * @see - * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants - * @see - * XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief 64-bit seeded variant of XXH3 - * - * This variant generates a custom secret on the fly based on default secret - * altered using the `seed` value. - * - * While this operation is decently fast, note that it's not completely free. - * - * @note - * seed == 0 produces the same results as @ref XXH3_64bits(). - * - * @param input The data to hash - * @param length The length - * @param seed The 64-bit seed to alter the state. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); - -/*! - * The bare minimum size for a custom secret. - * - * @see - * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(), - * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret(). - */ -#define XXH3_SECRET_SIZE_MIN 136 - -/*! - * @brief 64-bit variant of XXH3 with a custom "secret". - * - * It's possible to provide any blob of bytes as a "secret" to generate the hash. - * This makes it more difficult for an external actor to prepare an intentional collision. - * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN). - * However, the quality of the secret impacts the dispersion of the hash algorithm. - * Therefore, the secret _must_ look like a bunch of random bytes. - * Avoid "trivial" or structured data such as repeated sequences or a text document. - * Whenever in doubt about the "randomness" of the blob of bytes, - * consider employing "XXH3_generateSecret()" instead (see below). - * It will generate a proper high entropy secret derived from the blob of bytes. - * Another advantage of using XXH3_generateSecret() is that - * it guarantees that all bits within the initial blob of bytes - * will impact every bit of the output. - * This is not necessarily the case when using the blob of bytes directly - * because, when hashing _small_ inputs, only a portion of the secret is employed. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); - - -/******* Streaming *******/ -#ifndef XXH_NO_STREAM -/* - * Streaming requires state maintenance. - * This operation costs memory and CPU. - * As a consequence, streaming is slower than one-shot hashing. - * For better performance, prefer one-shot functions whenever applicable. - */ - -/*! - * @brief The state struct for the XXH3 streaming API. - * - * @see XXH3_state_s for details. - */ -typedef struct XXH3_state_s XXH3_state_t; -XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); - -/*! - * @brief Copies one @ref XXH3_state_t to another. - * - * @param dst_state The state to copy to. - * @param src_state The state to copy from. - * @pre - * @p dst_state and @p src_state must not be `NULL` and must not overlap. - */ -XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state); - -/*! - * @brief Resets an @ref XXH3_state_t to begin a new hash. - * - * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_64bits_update(). - * Digest will be equivalent to `XXH3_64bits()`. - * - * @param statePtr The state struct to reset. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); - -/*! - * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. - * - * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_64bits_update(). - * Digest will be equivalent to `XXH3_64bits_withSeed()`. - * - * @param statePtr The state struct to reset. - * @param seed The 64-bit seed to alter the state. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); - -/*! - * XXH3_64bits_reset_withSecret(): - * `secret` is referenced, it _must outlive_ the hash streaming session. - * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`, - * and the quality of produced hash values depends on secret's entropy - * (secret's content should look like a bunch of random bytes). - * When in doubt about the randomness of a candidate `secret`, - * consider employing `XXH3_generateSecret()` instead (see below). - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); - -/*! - * @brief Consumes a block of @p input to an @ref XXH3_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t. - * - * @note - * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated XXH3 64-bit hash value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ - -/* note : canonical representation of XXH3 is the same as XXH64 - * since they both produce XXH64_hash_t values */ - - -/*-********************************************************************** -* XXH3 128-bit variant -************************************************************************/ - -/*! - * @brief The return value from 128-bit hashes. - * - * Stored in little endian order, although the fields themselves are in native - * endianness. - */ -typedef struct { - XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ - XXH64_hash_t high64; /*!< `value >> 64` */ -} XXH128_hash_t; - -/*! - * @brief Unseeded 128-bit variant of XXH3 - * - * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead - * for shorter inputs. - * - * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however - * it may have slightly better performance due to constant propagation of the - * defaults. - * - * @see - * XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms - * @see - * XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants - * @see - * XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len); -/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); -/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); - -/******* Streaming *******/ -#ifndef XXH_NO_STREAM -/* - * Streaming requires state maintenance. - * This operation costs memory and CPU. - * As a consequence, streaming is slower than one-shot hashing. - * For better performance, prefer one-shot functions whenever applicable. - * - * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits(). - * Use already declared XXH3_createState() and XXH3_freeState(). - * - * All reset and streaming functions have same meaning as their 64-bit counterpart. - */ - -/*! - * @brief Resets an @ref XXH3_state_t to begin a new hash. - * - * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_128bits_update(). - * Digest will be equivalent to `XXH3_128bits()`. - * - * @param statePtr The state struct to reset. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); - -/*! - * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. - * - * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_128bits_update(). - * Digest will be equivalent to `XXH3_128bits_withSeed()`. - * - * @param statePtr The state struct to reset. - * @param seed The 64-bit seed to alter the state. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); -/*! @brief Custom secret 128-bit variant of XXH3. @see XXH_64bits_reset_withSecret(). */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); - -/*! - * @brief Consumes a block of @p input to an @ref XXH3_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t. - * - * @note - * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated XXH3 128-bit hash value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ - -/* Following helper functions make it possible to compare XXH128_hast_t values. - * Since XXH128_hash_t is a structure, this capability is not offered by the language. - * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */ - -/*! - * XXH128_isEqual(): - * Return: 1 if `h1` and `h2` are equal, 0 if they are not. - */ -XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); - -/*! - * @brief Compares two @ref XXH128_hash_t - * This comparator is compatible with stdlib's `qsort()`/`bsearch()`. - * - * @return: >0 if *h128_1 > *h128_2 - * =0 if *h128_1 == *h128_2 - * <0 if *h128_1 < *h128_2 - */ -XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2); - - -/******* Canonical representation *******/ -typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; - - -/*! - * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t. - * - * @param dst The @ref XXH128_canonical_t pointer to be stored to. - * @param hash The @ref XXH128_hash_t to be converted. - * - * @pre - * @p dst must not be `NULL`. - */ -XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash); - -/*! - * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t. - * - * @param src The @ref XXH128_canonical_t to convert. - * - * @pre - * @p src must not be `NULL`. - * - * @return The converted hash. - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src); - - -#endif /* !XXH_NO_XXH3 */ -#endif /* XXH_NO_LONG_LONG */ - -/*! - * @} - */ -#endif /* XXHASH_H_5627135585666179 */ - - - -#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) -#define XXHASH_H_STATIC_13879238742 -/* **************************************************************************** - * This section contains declarations which are not guaranteed to remain stable. - * They may change in future versions, becoming incompatible with a different - * version of the library. - * These declarations should only be used with static linking. - * Never use them in association with dynamic linking! - ***************************************************************************** */ - -/* - * These definitions are only present to allow static allocation - * of XXH states, on stack or in a struct, for example. - * Never **ever** access their members directly. - */ - -/*! - * @internal - * @brief Structure for XXH32 streaming API. - * - * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, - * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is - * an opaque type. This allows fields to safely be changed. - * - * Typedef'd to @ref XXH32_state_t. - * Do not access the members of this struct directly. - * @see XXH64_state_s, XXH3_state_s - */ -struct XXH32_state_s { - XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ - XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ - XXH32_hash_t v[4]; /*!< Accumulator lanes */ - XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */ - XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */ - XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ -}; /* typedef'd to XXH32_state_t */ - - -#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ - -/*! - * @internal - * @brief Structure for XXH64 streaming API. - * - * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, - * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is - * an opaque type. This allows fields to safely be changed. - * - * Typedef'd to @ref XXH64_state_t. - * Do not access the members of this struct directly. - * @see XXH32_state_s, XXH3_state_s - */ -struct XXH64_state_s { - XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ - XXH64_hash_t v[4]; /*!< Accumulator lanes */ - XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */ - XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */ - XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ - XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */ -}; /* typedef'd to XXH64_state_t */ - -#ifndef XXH_NO_XXH3 - -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */ -# include -# define XXH_ALIGN(n) alignas(n) -#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ -/* In C++ alignas() is a keyword */ -# define XXH_ALIGN(n) alignas(n) -#elif defined(__GNUC__) -# define XXH_ALIGN(n) __attribute__ ((aligned(n))) -#elif defined(_MSC_VER) -# define XXH_ALIGN(n) __declspec(align(n)) -#else -# define XXH_ALIGN(n) /* disabled */ -#endif - -/* Old GCC versions only accept the attribute after the type in structures. */ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ - && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && defined(__GNUC__) -# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) -#else -# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type -#endif - -/*! - * @brief The size of the internal XXH3 buffer. - * - * This is the optimal update size for incremental hashing. - * - * @see XXH3_64b_update(), XXH3_128b_update(). - */ -#define XXH3_INTERNALBUFFER_SIZE 256 - -/*! - * @internal - * @brief Default size of the secret buffer (and @ref XXH3_kSecret). - * - * This is the size used in @ref XXH3_kSecret and the seeded functions. - * - * Not to be confused with @ref XXH3_SECRET_SIZE_MIN. - */ -#define XXH3_SECRET_DEFAULT_SIZE 192 - -/*! - * @internal - * @brief Structure for XXH3 streaming API. - * - * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, - * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. - * Otherwise it is an opaque type. - * Never use this definition in combination with dynamic library. - * This allows fields to safely be changed in the future. - * - * @note ** This structure has a strict alignment requirement of 64 bytes!! ** - * Do not allocate this with `malloc()` or `new`, - * it will not be sufficiently aligned. - * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation. - * - * Typedef'd to @ref XXH3_state_t. - * Do never access the members of this struct directly. - * - * @see XXH3_INITSTATE() for stack initialization. - * @see XXH3_createState(), XXH3_freeState(). - * @see XXH32_state_s, XXH64_state_s - */ -struct XXH3_state_s { - XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); - /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */ - XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); - /*!< Used to store a custom secret generated from a seed. */ - XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); - /*!< The internal buffer. @see XXH32_state_s::mem32 */ - XXH32_hash_t bufferedSize; - /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ - XXH32_hash_t useSeed; - /*!< Reserved field. Needed for padding on 64-bit. */ - size_t nbStripesSoFar; - /*!< Number or stripes processed. */ - XXH64_hash_t totalLen; - /*!< Total length hashed. 64-bit even on 32-bit targets. */ - size_t nbStripesPerBlock; - /*!< Number of stripes per block. */ - size_t secretLimit; - /*!< Size of @ref customSecret or @ref extSecret */ - XXH64_hash_t seed; - /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */ - XXH64_hash_t reserved64; - /*!< Reserved field. */ - const unsigned char* extSecret; - /*!< Reference to an external secret for the _withSecret variants, NULL - * for other variants. */ - /* note: there may be some padding at the end due to alignment on 64 bytes */ -}; /* typedef'd to XXH3_state_t */ - -#undef XXH_ALIGN_MEMBER - -/*! - * @brief Initializes a stack-allocated `XXH3_state_s`. - * - * When the @ref XXH3_state_t structure is merely emplaced on stack, - * it should be initialized with XXH3_INITSTATE() or a memset() - * in case its first reset uses XXH3_NNbits_reset_withSeed(). - * This init can be omitted if the first reset uses default or _withSecret mode. - * This operation isn't necessary when the state is created with XXH3_createState(). - * Note that this doesn't prepare the state for a streaming operation, - * it's still necessary to use XXH3_NNbits_reset*() afterwards. - */ -#define XXH3_INITSTATE(XXH3_state_ptr) \ - do { \ - XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \ - tmp_xxh3_state_ptr->seed = 0; \ - tmp_xxh3_state_ptr->extSecret = NULL; \ - } while(0) - - -/*! - * simple alias to pre-selected XXH3_128bits variant - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); - - -/* === Experimental API === */ -/* Symbols defined below must be considered tied to a specific library version. */ - -/*! - * XXH3_generateSecret(): - * - * Derive a high-entropy secret from any user-defined content, named customSeed. - * The generated secret can be used in combination with `*_withSecret()` functions. - * The `_withSecret()` variants are useful to provide a higher level of protection - * than 64-bit seed, as it becomes much more difficult for an external actor to - * guess how to impact the calculation logic. - * - * The function accepts as input a custom seed of any length and any content, - * and derives from it a high-entropy secret of length @p secretSize into an - * already allocated buffer @p secretBuffer. - * - * The generated secret can then be used with any `*_withSecret()` variant. - * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(), - * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret() - * are part of this list. They all accept a `secret` parameter - * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN) - * _and_ feature very high entropy (consist of random-looking bytes). - * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can - * be employed to ensure proper quality. - * - * @p customSeed can be anything. It can have any size, even small ones, - * and its content can be anything, even "poor entropy" sources such as a bunch - * of zeroes. The resulting `secret` will nonetheless provide all required qualities. - * - * @pre - * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN - * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior. - * - * Example code: - * @code{.c} - * #include - * #include - * #include - * #define XXH_STATIC_LINKING_ONLY // expose unstable API - * #include "xxhash.h" - * // Hashes argv[2] using the entropy from argv[1]. - * int main(int argc, char* argv[]) - * { - * char secret[XXH3_SECRET_SIZE_MIN]; - * if (argv != 3) { return 1; } - * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1])); - * XXH64_hash_t h = XXH3_64bits_withSecret( - * argv[2], strlen(argv[2]), - * secret, sizeof(secret) - * ); - * printf("%016llx\n", (unsigned long long) h); - * } - * @endcode - */ -XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize); - -/*! - * @brief Generate the same secret as the _withSeed() variants. - * - * The generated secret can be used in combination with - *`*_withSecret()` and `_withSecretandSeed()` variants. - * - * Example C++ `std::string` hash class: - * @code{.cpp} - * #include - * #define XXH_STATIC_LINKING_ONLY // expose unstable API - * #include "xxhash.h" - * // Slow, seeds each time - * class HashSlow { - * XXH64_hash_t seed; - * public: - * HashSlow(XXH64_hash_t s) : seed{s} {} - * size_t operator()(const std::string& x) const { - * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)}; - * } - * }; - * // Fast, caches the seeded secret for future uses. - * class HashFast { - * unsigned char secret[XXH3_SECRET_SIZE_MIN]; - * public: - * HashFast(XXH64_hash_t s) { - * XXH3_generateSecret_fromSeed(secret, seed); - * } - * size_t operator()(const std::string& x) const { - * return size_t{ - * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret)) - * }; - * } - * }; - * @endcode - * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes - * @param seed The seed to seed the state. - */ -XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed); - -/*! - * These variants generate hash values using either - * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes) - * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX). - * - * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`. - * `_withSeed()` has to generate the secret on the fly for "large" keys. - * It's fast, but can be perceptible for "not so large" keys (< 1 KB). - * `_withSecret()` has to generate the masks on the fly for "small" keys, - * which requires more instructions than _withSeed() variants. - * Therefore, _withSecretandSeed variant combines the best of both worlds. - * - * When @p secret has been generated by XXH3_generateSecret_fromSeed(), - * this variant produces *exactly* the same results as `_withSeed()` variant, - * hence offering only a pure speed benefit on "large" input, - * by skipping the need to regenerate the secret for every large input. - * - * Another usage scenario is to hash the secret to a 64-bit hash value, - * for example with XXH3_64bits(), which then becomes the seed, - * and then employ both the seed and the secret in _withSecretandSeed(). - * On top of speed, an added benefit is that each bit in the secret - * has a 50% chance to swap each bit in the output, via its impact to the seed. - * - * This is not guaranteed when using the secret directly in "small data" scenarios, - * because only portions of the secret are employed for small data. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t -XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed); -/*! @copydoc XXH3_64bits_withSecretandSeed() */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t -XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); -#ifndef XXH_NO_STREAM -/*! @copydoc XXH3_64bits_withSecretandSeed() */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); -/*! @copydoc XXH3_64bits_withSecretandSeed() */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); -#endif /* !XXH_NO_STREAM */ - -#endif /* !XXH_NO_XXH3 */ -#endif /* XXH_NO_LONG_LONG */ -#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# define XXH_IMPLEMENTATION -#endif - -#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ - - -/* ======================================================================== */ -/* ======================================================================== */ -/* ======================================================================== */ - - -/*-********************************************************************** - * xxHash implementation - *-********************************************************************** - * xxHash's implementation used to be hosted inside xxhash.c. - * - * However, inlining requires implementation to be visible to the compiler, - * hence be included alongside the header. - * Previously, implementation was hosted inside xxhash.c, - * which was then #included when inlining was activated. - * This construction created issues with a few build and install systems, - * as it required xxhash.c to be stored in /include directory. - * - * xxHash implementation is now directly integrated within xxhash.h. - * As a consequence, xxhash.c is no longer needed in /include. - * - * xxhash.c is still available and is still useful. - * In a "normal" setup, when xxhash is not inlined, - * xxhash.h only exposes the prototypes and public symbols, - * while xxhash.c can be built into an object file xxhash.o - * which can then be linked into the final binary. - ************************************************************************/ - -#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ - || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) -# define XXH_IMPLEM_13a8737387 - -/* ************************************* -* Tuning parameters -***************************************/ - -/*! - * @defgroup tuning Tuning parameters - * @{ - * - * Various macros to control xxHash's behavior. - */ -#ifdef XXH_DOXYGEN -/*! - * @brief Define this to disable 64-bit code. - * - * Useful if only using the @ref XXH32_family and you have a strict C90 compiler. - */ -# define XXH_NO_LONG_LONG -# undef XXH_NO_LONG_LONG /* don't actually */ -/*! - * @brief Controls how unaligned memory is accessed. - * - * By default, access to unaligned memory is controlled by `memcpy()`, which is - * safe and portable. - * - * Unfortunately, on some target/compiler combinations, the generated assembly - * is sub-optimal. - * - * The below switch allow selection of a different access method - * in the search for improved performance. - * - * @par Possible options: - * - * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy` - * @par - * Use `memcpy()`. Safe and portable. Note that most modern compilers will - * eliminate the function call and treat it as an unaligned access. - * - * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))` - * @par - * Depends on compiler extensions and is therefore not portable. - * This method is safe _if_ your compiler supports it, - * and *generally* as fast or faster than `memcpy`. - * - * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast - * @par - * Casts directly and dereferences. This method doesn't depend on the - * compiler, but it violates the C standard as it directly dereferences an - * unaligned pointer. It can generate buggy code on targets which do not - * support unaligned memory accesses, but in some circumstances, it's the - * only known way to get the most performance. - * - * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift - * @par - * Also portable. This can generate the best code on old compilers which don't - * inline small `memcpy()` calls, and it might also be faster on big-endian - * systems which lack a native byteswap instruction. However, some compilers - * will emit literal byteshifts even if the target supports unaligned access. - * - * - * @warning - * Methods 1 and 2 rely on implementation-defined behavior. Use these with - * care, as what works on one compiler/platform/optimization level may cause - * another to read garbage data or even crash. - * - * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details. - * - * Prefer these methods in priority order (0 > 3 > 1 > 2) - */ -# define XXH_FORCE_MEMORY_ACCESS 0 - -/*! - * @def XXH_SIZE_OPT - * @brief Controls how much xxHash optimizes for size. - * - * xxHash, when compiled, tends to result in a rather large binary size. This - * is mostly due to heavy usage to forced inlining and constant folding of the - * @ref XXH3_family to increase performance. - * - * However, some developers prefer size over speed. This option can - * significantly reduce the size of the generated code. When using the `-Os` - * or `-Oz` options on GCC or Clang, this is defined to 1 by default, - * otherwise it is defined to 0. - * - * Most of these size optimizations can be controlled manually. - * - * This is a number from 0-2. - * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed - * comes first. - * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more - * conservative and disables hacks that increase code size. It implies the - * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0, - * and @ref XXH3_NEON_LANES == 8 if they are not already defined. - * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible. - * Performance may cry. For example, the single shot functions just use the - * streaming API. - */ -# define XXH_SIZE_OPT 0 - -/*! - * @def XXH_FORCE_ALIGN_CHECK - * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32() - * and XXH64() only). - * - * This is an important performance trick for architectures without decent - * unaligned memory access performance. - * - * It checks for input alignment, and when conditions are met, uses a "fast - * path" employing direct 32-bit/64-bit reads, resulting in _dramatically - * faster_ read speed. - * - * The check costs one initial branch per hash, which is generally negligible, - * but not zero. - * - * Moreover, it's not useful to generate an additional code path if memory - * access uses the same instruction for both aligned and unaligned - * addresses (e.g. x86 and aarch64). - * - * In these cases, the alignment check can be removed by setting this macro to 0. - * Then the code will always use unaligned memory access. - * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips - * which are platforms known to offer good unaligned memory accesses performance. - * - * It is also disabled by default when @ref XXH_SIZE_OPT >= 1. - * - * This option does not affect XXH3 (only XXH32 and XXH64). - */ -# define XXH_FORCE_ALIGN_CHECK 0 - -/*! - * @def XXH_NO_INLINE_HINTS - * @brief When non-zero, sets all functions to `static`. - * - * By default, xxHash tries to force the compiler to inline almost all internal - * functions. - * - * This can usually improve performance due to reduced jumping and improved - * constant folding, but significantly increases the size of the binary which - * might not be favorable. - * - * Additionally, sometimes the forced inlining can be detrimental to performance, - * depending on the architecture. - * - * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the - * compiler full control on whether to inline or not. - * - * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if - * @ref XXH_SIZE_OPT >= 1, this will automatically be defined. - */ -# define XXH_NO_INLINE_HINTS 0 - -/*! - * @def XXH3_INLINE_SECRET - * @brief Determines whether to inline the XXH3 withSecret code. - * - * When the secret size is known, the compiler can improve the performance - * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret(). - * - * However, if the secret size is not known, it doesn't have any benefit. This - * happens when xxHash is compiled into a global symbol. Therefore, if - * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0. - * - * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers - * that are *sometimes* force inline on -Og, and it is impossible to automatically - * detect this optimization level. - */ -# define XXH3_INLINE_SECRET 0 - -/*! - * @def XXH32_ENDJMP - * @brief Whether to use a jump for `XXH32_finalize`. - * - * For performance, `XXH32_finalize` uses multiple branches in the finalizer. - * This is generally preferable for performance, - * but depending on exact architecture, a jmp may be preferable. - * - * This setting is only possibly making a difference for very small inputs. - */ -# define XXH32_ENDJMP 0 - -/*! - * @internal - * @brief Redefines old internal names. - * - * For compatibility with code that uses xxHash's internals before the names - * were changed to improve namespacing. There is no other reason to use this. - */ -# define XXH_OLD_NAMES -# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ - -/*! - * @def XXH_NO_STREAM - * @brief Disables the streaming API. - * - * When xxHash is not inlined and the streaming functions are not used, disabling - * the streaming functions can improve code size significantly, especially with - * the @ref XXH3_family which tends to make constant folded copies of itself. - */ -# define XXH_NO_STREAM -# undef XXH_NO_STREAM /* don't actually */ -#endif /* XXH_DOXYGEN */ -/*! - * @} - */ - -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ - /* prefer __packed__ structures (method 1) for GCC - * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy - * which for some reason does unaligned loads. */ -# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -#ifndef XXH_SIZE_OPT - /* default to 1 for -Os or -Oz */ -# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__) -# define XXH_SIZE_OPT 1 -# else -# define XXH_SIZE_OPT 0 -# endif -#endif - -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ - /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */ -# if XXH_SIZE_OPT >= 1 || \ - defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \ - || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */ -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif - -#ifndef XXH_NO_INLINE_HINTS -# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */ -# define XXH_NO_INLINE_HINTS 1 -# else -# define XXH_NO_INLINE_HINTS 0 -# endif -#endif - -#ifndef XXH3_INLINE_SECRET -# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \ - || !defined(XXH_INLINE_ALL) -# define XXH3_INLINE_SECRET 0 -# else -# define XXH3_INLINE_SECRET 1 -# endif -#endif - -#ifndef XXH32_ENDJMP -/* generally preferable for performance */ -# define XXH32_ENDJMP 0 -#endif - -/*! - * @defgroup impl Implementation - * @{ - */ - - -/* ************************************* -* Includes & Memory related functions -***************************************/ -#if defined(XXH_NO_STREAM) -/* nothing */ -#elif defined(XXH_NO_STDLIB) - -/* When requesting to disable any mention of stdlib, - * the library loses the ability to invoked malloc / free. - * In practice, it means that functions like `XXH*_createState()` - * will always fail, and return NULL. - * This flag is useful in situations where - * xxhash.h is integrated into some kernel, embedded or limited environment - * without access to dynamic allocation. - */ - -static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; } -static void XXH_free(void* p) { (void)p; } - -#else - -/* - * Modify the local functions below should you wish to use - * different memory routines for malloc() and free() - */ -#include - -/*! - * @internal - * @brief Modify this function to use a different routine than malloc(). - */ -static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); } - -/*! - * @internal - * @brief Modify this function to use a different routine than free(). - */ -static void XXH_free(void* p) { free(p); } - -#endif /* XXH_NO_STDLIB */ - -#include - -/*! - * @internal - * @brief Modify this function to use a different routine than memcpy(). - */ -static void* XXH_memcpy(void* dest, const void* src, size_t size) -{ - return memcpy(dest,src,size); -} - -#include /* ULLONG_MAX */ - - -/* ************************************* -* Compiler Specific Options -***************************************/ -#ifdef _MSC_VER /* Visual Studio warning fix */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - -#if XXH_NO_INLINE_HINTS /* disable inlining hints */ -# if defined(__GNUC__) || defined(__clang__) -# define XXH_FORCE_INLINE static __attribute__((unused)) -# else -# define XXH_FORCE_INLINE static -# endif -# define XXH_NO_INLINE static -/* enable inlining hints */ -#elif defined(__GNUC__) || defined(__clang__) -# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) -# define XXH_NO_INLINE static __attribute__((noinline)) -#elif defined(_MSC_VER) /* Visual Studio */ -# define XXH_FORCE_INLINE static __forceinline -# define XXH_NO_INLINE static __declspec(noinline) -#elif defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ -# define XXH_FORCE_INLINE static inline -# define XXH_NO_INLINE static -#else -# define XXH_FORCE_INLINE static -# define XXH_NO_INLINE static -#endif - -#if XXH3_INLINE_SECRET -# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE -#else -# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE -#endif - - -/* ************************************* -* Debug -***************************************/ -/*! - * @ingroup tuning - * @def XXH_DEBUGLEVEL - * @brief Sets the debugging level. - * - * XXH_DEBUGLEVEL is expected to be defined externally, typically via the - * compiler's command line options. The value must be a number. - */ -#ifndef XXH_DEBUGLEVEL -# ifdef DEBUGLEVEL /* backwards compat */ -# define XXH_DEBUGLEVEL DEBUGLEVEL -# else -# define XXH_DEBUGLEVEL 0 -# endif -#endif - -#if (XXH_DEBUGLEVEL>=1) -# include /* note: can still be disabled with NDEBUG */ -# define XXH_ASSERT(c) assert(c) -#else -# if defined(__INTEL_COMPILER) -# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c)) -# else -# define XXH_ASSERT(c) XXH_ASSUME(c) -# endif -#endif - -/* note: use after variable declarations */ -#ifndef XXH_STATIC_ASSERT -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0) -# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) -# else -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) -# endif -# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c) -#endif - -/*! - * @internal - * @def XXH_COMPILER_GUARD(var) - * @brief Used to prevent unwanted optimizations for @p var. - * - * It uses an empty GCC inline assembly statement with a register constraint - * which forces @p var into a general purpose register (eg eax, ebx, ecx - * on x86) and marks it as modified. - * - * This is used in a few places to avoid unwanted autovectorization (e.g. - * XXH32_round()). All vectorization we want is explicit via intrinsics, - * and _usually_ isn't wanted elsewhere. - * - * We also use it to prevent unwanted constant folding for AArch64 in - * XXH3_initCustomSecret_scalar(). - */ -#if defined(__GNUC__) || defined(__clang__) -# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var)) -#else -# define XXH_COMPILER_GUARD(var) ((void)0) -#endif - -/* Specifically for NEON vectors which use the "w" constraint, on - * Clang. */ -#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__) -# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var)) -#else -# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0) -#endif - -/* ************************************* -* Basic Types -***************************************/ -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t xxh_u8; -#else - typedef unsigned char xxh_u8; -#endif -typedef XXH32_hash_t xxh_u32; - -#ifdef XXH_OLD_NAMES -# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly" -# define BYTE xxh_u8 -# define U8 xxh_u8 -# define U32 xxh_u32 -#endif - -/* *** Memory access *** */ - -/*! - * @internal - * @fn xxh_u32 XXH_read32(const void* ptr) - * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * - * @param ptr The pointer to read from. - * @return The 32-bit native endian integer from the bytes at @p ptr. - */ - -/*! - * @internal - * @fn xxh_u32 XXH_readLE32(const void* ptr) - * @brief Reads an unaligned 32-bit little endian integer from @p ptr. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * - * @param ptr The pointer to read from. - * @return The 32-bit little endian integer from the bytes at @p ptr. - */ - -/*! - * @internal - * @fn xxh_u32 XXH_readBE32(const void* ptr) - * @brief Reads an unaligned 32-bit big endian integer from @p ptr. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * - * @param ptr The pointer to read from. - * @return The 32-bit big endian integer from the bytes at @p ptr. - */ - -/*! - * @internal - * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) - * @brief Like @ref XXH_readLE32(), but has an option for aligned reads. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is - * always @ref XXH_alignment::XXH_unaligned. - * - * @param ptr The pointer to read from. - * @param align Whether @p ptr is aligned. - * @pre - * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte - * aligned. - * @return The 32-bit little endian integer from the bytes at @p ptr. - */ - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) -/* - * Manual byteshift. Best for old compilers which don't inline memcpy. - * We actually directly use XXH_readLE32 and XXH_readBE32. - */ -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* - * Force direct memory access. Only works on CPU which support unaligned memory - * access in hardware. - */ -static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* - * __attribute__((aligned(1))) is supported by gcc and clang. Originally the - * documentation claimed that it only increased the alignment, but actually it - * can decrease it on gcc, clang, and icc: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, - * https://gcc.godbolt.org/z/xYez1j67Y. - */ -#ifdef XXH_OLD_NAMES -typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; -#endif -static xxh_u32 XXH_read32(const void* ptr) -{ - typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32; - return *((const xxh_unalign32*)ptr); -} - -#else - -/* - * Portable and safe solution. Generally efficient. - * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html - */ -static xxh_u32 XXH_read32(const void* memPtr) -{ - xxh_u32 val; - XXH_memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - - -/* *** Endianness *** */ - -/*! - * @ingroup tuning - * @def XXH_CPU_LITTLE_ENDIAN - * @brief Whether the target is little endian. - * - * Defined to 1 if the target is little endian, or 0 if it is big endian. - * It can be defined externally, for example on the compiler command line. - * - * If it is not defined, - * a runtime check (which is usually constant folded) is used instead. - * - * @note - * This is not necessarily defined to an integer constant. - * - * @see XXH_isLittleEndian() for the runtime check. - */ -#ifndef XXH_CPU_LITTLE_ENDIAN -/* - * Try to detect endianness automatically, to avoid the nonstandard behavior - * in `XXH_isLittleEndian()` - */ -# if defined(_WIN32) /* Windows is always little endian */ \ - || defined(__LITTLE_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define XXH_CPU_LITTLE_ENDIAN 1 -# elif defined(__BIG_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define XXH_CPU_LITTLE_ENDIAN 0 -# else -/*! - * @internal - * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN. - * - * Most compilers will constant fold this. - */ -static int XXH_isLittleEndian(void) -{ - /* - * Portable and well-defined behavior. - * Don't use static: it is detrimental to performance. - */ - const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; - return one.c[0]; -} -# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() -# endif -#endif - - - - -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -#ifdef __has_builtin -# define XXH_HAS_BUILTIN(x) __has_builtin(x) -#else -# define XXH_HAS_BUILTIN(x) 0 -#endif - - - -/* - * C23 and future versions have standard "unreachable()". - * Once it has been implemented reliably we can add it as an - * additional case: - * - * ``` - * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) - * # include - * # ifdef unreachable - * # define XXH_UNREACHABLE() unreachable() - * # endif - * #endif - * ``` - * - * Note C++23 also has std::unreachable() which can be detected - * as follows: - * ``` - * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L) - * # include - * # define XXH_UNREACHABLE() std::unreachable() - * #endif - * ``` - * NB: `__cpp_lib_unreachable` is defined in the `` header. - * We don't use that as including `` in `extern "C"` blocks - * doesn't work on GCC12 - */ - -#if XXH_HAS_BUILTIN(__builtin_unreachable) -# define XXH_UNREACHABLE() __builtin_unreachable() - -#elif defined(_MSC_VER) -# define XXH_UNREACHABLE() __assume(0) - -#else -# define XXH_UNREACHABLE() -#endif - -#if XXH_HAS_BUILTIN(__builtin_assume) -# define XXH_ASSUME(c) __builtin_assume(c) -#else -# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); } -#endif - -/*! - * @internal - * @def XXH_rotl32(x,r) - * @brief 32-bit rotate left. - * - * @param x The 32-bit integer to be rotated. - * @param r The number of bits to rotate. - * @pre - * @p r > 0 && @p r < 32 - * @note - * @p x and @p r may be evaluated multiple times. - * @return The rotated result. - */ -#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ - && XXH_HAS_BUILTIN(__builtin_rotateleft64) -# define XXH_rotl32 __builtin_rotateleft32 -# define XXH_rotl64 __builtin_rotateleft64 -/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ -#elif defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) -# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) -#endif - -/*! - * @internal - * @fn xxh_u32 XXH_swap32(xxh_u32 x) - * @brief A 32-bit byteswap. - * - * @param x The 32-bit integer to byteswap. - * @return @p x, byteswapped. - */ -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -#else -static xxh_u32 XXH_swap32 (xxh_u32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -#endif - - -/* *************************** -* Memory reads -*****************************/ - -/*! - * @internal - * @brief Enum to indicate whether a pointer is aligned. - */ -typedef enum { - XXH_aligned, /*!< Aligned */ - XXH_unaligned /*!< Possibly unaligned */ -} XXH_alignment; - -/* - * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. - * - * This is ideal for older compilers which don't inline memcpy. - */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) - -XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[0] - | ((xxh_u32)bytePtr[1] << 8) - | ((xxh_u32)bytePtr[2] << 16) - | ((xxh_u32)bytePtr[3] << 24); -} - -XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[3] - | ((xxh_u32)bytePtr[2] << 8) - | ((xxh_u32)bytePtr[1] << 16) - | ((xxh_u32)bytePtr[0] << 24); -} - -#else -XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); -} - -static xxh_u32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} -#endif - -XXH_FORCE_INLINE xxh_u32 -XXH_readLE32_align(const void* ptr, XXH_alignment align) -{ - if (align==XXH_unaligned) { - return XXH_readLE32(ptr); - } else { - return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); - } -} - - -/* ************************************* -* Misc -***************************************/ -/*! @ingroup public */ -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ******************************************************************* -* 32-bit hash functions -*********************************************************************/ -/*! - * @} - * @defgroup XXH32_impl XXH32 implementation - * @ingroup impl - * - * Details on the XXH32 implementation. - * @{ - */ - /* #define instead of static const, to be used as initializers */ -#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ -#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ -#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ -#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ -#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ - -#ifdef XXH_OLD_NAMES -# define PRIME32_1 XXH_PRIME32_1 -# define PRIME32_2 XXH_PRIME32_2 -# define PRIME32_3 XXH_PRIME32_3 -# define PRIME32_4 XXH_PRIME32_4 -# define PRIME32_5 XXH_PRIME32_5 -#endif - -/*! - * @internal - * @brief Normal stripe processing routine. - * - * This shuffles the bits so that any bit from @p input impacts several bits in - * @p acc. - * - * @param acc The accumulator lane. - * @param input The stripe of input to mix. - * @return The mixed accumulator lane. - */ -static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) -{ - acc += input * XXH_PRIME32_2; - acc = XXH_rotl32(acc, 13); - acc *= XXH_PRIME32_1; -#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) - /* - * UGLY HACK: - * A compiler fence is the only thing that prevents GCC and Clang from - * autovectorizing the XXH32 loop (pragmas and attributes don't work for some - * reason) without globally disabling SSE4.1. - * - * The reason we want to avoid vectorization is because despite working on - * 4 integers at a time, there are multiple factors slowing XXH32 down on - * SSE4: - * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on - * newer chips!) making it slightly slower to multiply four integers at - * once compared to four integers independently. Even when pmulld was - * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE - * just to multiply unless doing a long operation. - * - * - Four instructions are required to rotate, - * movqda tmp, v // not required with VEX encoding - * pslld tmp, 13 // tmp <<= 13 - * psrld v, 19 // x >>= 19 - * por v, tmp // x |= tmp - * compared to one for scalar: - * roll v, 13 // reliably fast across the board - * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason - * - * - Instruction level parallelism is actually more beneficial here because - * the SIMD actually serializes this operation: While v1 is rotating, v2 - * can load data, while v3 can multiply. SSE forces them to operate - * together. - * - * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing - * the loop. NEON is only faster on the A53, and with the newer cores, it is less - * than half the speed. - * - * Additionally, this is used on WASM SIMD128 because it JITs to the same - * SIMD instructions and has the same issue. - */ - XXH_COMPILER_GUARD(acc); -#endif - return acc; -} - -/*! - * @internal - * @brief Mixes all bits to finalize the hash. - * - * The final mix ensures that all input bits have a chance to impact any bit in - * the output digest, resulting in an unbiased distribution. - * - * @param hash The hash to avalanche. - * @return The avalanched hash. - */ -static xxh_u32 XXH32_avalanche(xxh_u32 hash) -{ - hash ^= hash >> 15; - hash *= XXH_PRIME32_2; - hash ^= hash >> 13; - hash *= XXH_PRIME32_3; - hash ^= hash >> 16; - return hash; -} - -#define XXH_get32bits(p) XXH_readLE32_align(p, align) - -/*! - * @internal - * @brief Processes the last 0-15 bytes of @p ptr. - * - * There may be up to 15 bytes remaining to consume from the input. - * This final stage will digest them to ensure that all input bytes are present - * in the final mix. - * - * @param hash The hash to finalize. - * @param ptr The pointer to the remaining input. - * @param len The remaining length, modulo 16. - * @param align Whether @p ptr is aligned. - * @return The finalized hash. - * @see XXH64_finalize(). - */ -static XXH_PUREF xxh_u32 -XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) -{ -#define XXH_PROCESS1 do { \ - hash += (*ptr++) * XXH_PRIME32_5; \ - hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \ -} while (0) - -#define XXH_PROCESS4 do { \ - hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \ - ptr += 4; \ - hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \ -} while (0) - - if (ptr==NULL) XXH_ASSERT(len == 0); - - /* Compact rerolled version; generally faster */ - if (!XXH32_ENDJMP) { - len &= 15; - while (len >= 4) { - XXH_PROCESS4; - len -= 4; - } - while (len > 0) { - XXH_PROCESS1; - --len; - } - return XXH32_avalanche(hash); - } else { - switch(len&15) /* or switch(bEnd - p) */ { - case 12: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 8: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 4: XXH_PROCESS4; - return XXH32_avalanche(hash); - - case 13: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 9: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 5: XXH_PROCESS4; - XXH_PROCESS1; - return XXH32_avalanche(hash); - - case 14: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 10: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 6: XXH_PROCESS4; - XXH_PROCESS1; - XXH_PROCESS1; - return XXH32_avalanche(hash); - - case 15: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 11: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 7: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 3: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 2: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 1: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 0: return XXH32_avalanche(hash); - } - XXH_ASSERT(0); - return hash; /* reaching this point is deemed impossible */ - } -} - -#ifdef XXH_OLD_NAMES -# define PROCESS1 XXH_PROCESS1 -# define PROCESS4 XXH_PROCESS4 -#else -# undef XXH_PROCESS1 -# undef XXH_PROCESS4 -#endif - -/*! - * @internal - * @brief The implementation for @ref XXH32(). - * - * @param input , len , seed Directly passed from @ref XXH32(). - * @param align Whether @p input is aligned. - * @return The calculated hash. - */ -XXH_FORCE_INLINE XXH_PUREF xxh_u32 -XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) -{ - xxh_u32 h32; - - if (input==NULL) XXH_ASSERT(len == 0); - - if (len>=16) { - const xxh_u8* const bEnd = input + len; - const xxh_u8* const limit = bEnd - 15; - xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; - xxh_u32 v2 = seed + XXH_PRIME32_2; - xxh_u32 v3 = seed + 0; - xxh_u32 v4 = seed - XXH_PRIME32_1; - - do { - v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; - v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; - v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; - v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; - } while (input < limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) - + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + XXH_PRIME32_5; - } - - h32 += (xxh_u32)len; - - return XXH32_finalize(h32, input, len&15, align); -} - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) -{ -#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_state_t state; - XXH32_reset(&state, seed); - XXH32_update(&state, (const xxh_u8*)input, len); - return XXH32_digest(&state); -#else - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); - } } - - return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); -#endif -} - - - -/******* Hash streaming *******/ -#ifndef XXH_NO_STREAM -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) -{ - XXH_memcpy(dstState, srcState, sizeof(*dstState)); -} - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) -{ - XXH_ASSERT(statePtr != NULL); - memset(statePtr, 0, sizeof(*statePtr)); - statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2; - statePtr->v[1] = seed + XXH_PRIME32_2; - statePtr->v[2] = seed + 0; - statePtr->v[3] = seed - XXH_PRIME32_1; - return XXH_OK; -} - - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode -XXH32_update(XXH32_state_t* state, const void* input, size_t len) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } - - { const xxh_u8* p = (const xxh_u8*)input; - const xxh_u8* const bEnd = p + len; - - state->total_len_32 += (XXH32_hash_t)len; - state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); - state->memsize += (XXH32_hash_t)len; - return XXH_OK; - } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const xxh_u32* p32 = state->mem32; - state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++; - state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++; - state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++; - state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32)); - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const xxh_u8* const limit = bEnd - 16; - - do { - state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4; - state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4; - state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4; - state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4; - } while (p<=limit); - - } - - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - } - - return XXH_OK; -} - - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state) -{ - xxh_u32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v[0], 1) - + XXH_rotl32(state->v[1], 7) - + XXH_rotl32(state->v[2], 12) - + XXH_rotl32(state->v[3], 18); - } else { - h32 = state->v[2] /* == seed */ + XXH_PRIME32_5; - } - - h32 += state->total_len_32; - - return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); -} -#endif /* !XXH_NO_STREAM */ - -/******* Canonical representation *******/ - -/*! - * @ingroup XXH32_family - * The default return values from XXH functions are unsigned 32 and 64 bit - * integers. - * - * The canonical representation uses big endian convention, the same convention - * as human-readable numbers (large digits first). - * - * This way, hash values can be written into a file or buffer, remaining - * comparable across different systems. - * - * The following functions allow transformation of hash values to and from their - * canonical format. - */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - XXH_memcpy(dst, &hash, sizeof(*dst)); -} -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - - -#ifndef XXH_NO_LONG_LONG - -/* ******************************************************************* -* 64-bit hash functions -*********************************************************************/ -/*! - * @} - * @ingroup impl - * @{ - */ -/******* Memory access *******/ - -typedef XXH64_hash_t xxh_u64; - -#ifdef XXH_OLD_NAMES -# define U64 xxh_u64 -#endif - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) -/* - * Manual byteshift. Best for old compilers which don't inline memcpy. - * We actually directly use XXH_readLE64 and XXH_readBE64. - */ -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static xxh_u64 XXH_read64(const void* memPtr) -{ - return *(const xxh_u64*) memPtr; -} - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* - * __attribute__((aligned(1))) is supported by gcc and clang. Originally the - * documentation claimed that it only increased the alignment, but actually it - * can decrease it on gcc, clang, and icc: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, - * https://gcc.godbolt.org/z/xYez1j67Y. - */ -#ifdef XXH_OLD_NAMES -typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; -#endif -static xxh_u64 XXH_read64(const void* ptr) -{ - typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64; - return *((const xxh_unalign64*)ptr); -} - -#else - -/* - * Portable and safe solution. Generally efficient. - * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html - */ -static xxh_u64 XXH_read64(const void* memPtr) -{ - xxh_u64 val; - XXH_memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap64 _byteswap_uint64 -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap64 __builtin_bswap64 -#else -static xxh_u64 XXH_swap64(xxh_u64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - - -/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) - -XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[0] - | ((xxh_u64)bytePtr[1] << 8) - | ((xxh_u64)bytePtr[2] << 16) - | ((xxh_u64)bytePtr[3] << 24) - | ((xxh_u64)bytePtr[4] << 32) - | ((xxh_u64)bytePtr[5] << 40) - | ((xxh_u64)bytePtr[6] << 48) - | ((xxh_u64)bytePtr[7] << 56); -} - -XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[7] - | ((xxh_u64)bytePtr[6] << 8) - | ((xxh_u64)bytePtr[5] << 16) - | ((xxh_u64)bytePtr[4] << 24) - | ((xxh_u64)bytePtr[3] << 32) - | ((xxh_u64)bytePtr[2] << 40) - | ((xxh_u64)bytePtr[1] << 48) - | ((xxh_u64)bytePtr[0] << 56); -} - -#else -XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); -} - -static xxh_u64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} -#endif - -XXH_FORCE_INLINE xxh_u64 -XXH_readLE64_align(const void* ptr, XXH_alignment align) -{ - if (align==XXH_unaligned) - return XXH_readLE64(ptr); - else - return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); -} - - -/******* xxh64 *******/ -/*! - * @} - * @defgroup XXH64_impl XXH64 implementation - * @ingroup impl - * - * Details on the XXH64 implementation. - * @{ - */ -/* #define rather that static const, to be used as initializers */ -#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ -#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ -#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ -#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ -#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ - -#ifdef XXH_OLD_NAMES -# define PRIME64_1 XXH_PRIME64_1 -# define PRIME64_2 XXH_PRIME64_2 -# define PRIME64_3 XXH_PRIME64_3 -# define PRIME64_4 XXH_PRIME64_4 -# define PRIME64_5 XXH_PRIME64_5 -#endif - -/*! @copydoc XXH32_round */ -static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) -{ - acc += input * XXH_PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= XXH_PRIME64_1; - return acc; -} - -static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; - return acc; -} - -/*! @copydoc XXH32_avalanche */ -static xxh_u64 XXH64_avalanche(xxh_u64 hash) -{ - hash ^= hash >> 33; - hash *= XXH_PRIME64_2; - hash ^= hash >> 29; - hash *= XXH_PRIME64_3; - hash ^= hash >> 32; - return hash; -} - - -#define XXH_get64bits(p) XXH_readLE64_align(p, align) - -/*! - * @internal - * @brief Processes the last 0-31 bytes of @p ptr. - * - * There may be up to 31 bytes remaining to consume from the input. - * This final stage will digest them to ensure that all input bytes are present - * in the final mix. - * - * @param hash The hash to finalize. - * @param ptr The pointer to the remaining input. - * @param len The remaining length, modulo 32. - * @param align Whether @p ptr is aligned. - * @return The finalized hash - * @see XXH32_finalize(). - */ -static XXH_PUREF xxh_u64 -XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) -{ - if (ptr==NULL) XXH_ASSERT(len == 0); - len &= 31; - while (len >= 8) { - xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); - ptr += 8; - hash ^= k1; - hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; - len -= 8; - } - if (len >= 4) { - hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; - ptr += 4; - hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; - len -= 4; - } - while (len > 0) { - hash ^= (*ptr++) * XXH_PRIME64_5; - hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; - --len; - } - return XXH64_avalanche(hash); -} - -#ifdef XXH_OLD_NAMES -# define PROCESS1_64 XXH_PROCESS1_64 -# define PROCESS4_64 XXH_PROCESS4_64 -# define PROCESS8_64 XXH_PROCESS8_64 -#else -# undef XXH_PROCESS1_64 -# undef XXH_PROCESS4_64 -# undef XXH_PROCESS8_64 -#endif - -/*! - * @internal - * @brief The implementation for @ref XXH64(). - * - * @param input , len , seed Directly passed from @ref XXH64(). - * @param align Whether @p input is aligned. - * @return The calculated hash. - */ -XXH_FORCE_INLINE XXH_PUREF xxh_u64 -XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) -{ - xxh_u64 h64; - if (input==NULL) XXH_ASSERT(len == 0); - - if (len>=32) { - const xxh_u8* const bEnd = input + len; - const xxh_u8* const limit = bEnd - 31; - xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; - xxh_u64 v2 = seed + XXH_PRIME64_2; - xxh_u64 v3 = seed + 0; - xxh_u64 v4 = seed - XXH_PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; - v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; - v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; - v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; - } while (input= 2 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_state_t state; - XXH64_reset(&state, seed); - XXH64_update(&state, (const xxh_u8*)input, len); - return XXH64_digest(&state); -#else - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); - } } - - return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); - -#endif -} - -/******* Hash Streaming *******/ -#ifndef XXH_NO_STREAM -/*! @ingroup XXH64_family*/ -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState) -{ - XXH_memcpy(dstState, srcState, sizeof(*dstState)); -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed) -{ - XXH_ASSERT(statePtr != NULL); - memset(statePtr, 0, sizeof(*statePtr)); - statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2; - statePtr->v[1] = seed + XXH_PRIME64_2; - statePtr->v[2] = seed + 0; - statePtr->v[3] = seed - XXH_PRIME64_1; - return XXH_OK; -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode -XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } - - { const xxh_u8* p = (const xxh_u8*)input; - const xxh_u8* const bEnd = p + len; - - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); - state->memsize += (xxh_u32)len; - return XXH_OK; - } - - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0)); - state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1)); - state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2)); - state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3)); - p += 32 - state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) { - const xxh_u8* const limit = bEnd - 32; - - do { - state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8; - state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8; - state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8; - state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8; - } while (p<=limit); - - } - - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - } - - return XXH_OK; -} - - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) -{ - xxh_u64 h64; - - if (state->total_len >= 32) { - h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18); - h64 = XXH64_mergeRound(h64, state->v[0]); - h64 = XXH64_mergeRound(h64, state->v[1]); - h64 = XXH64_mergeRound(h64, state->v[2]); - h64 = XXH64_mergeRound(h64, state->v[3]); - } else { - h64 = state->v[2] /*seed*/ + XXH_PRIME64_5; - } - - h64 += (xxh_u64) state->total_len; - - return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); -} -#endif /* !XXH_NO_STREAM */ - -/******* Canonical representation *******/ - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - XXH_memcpy(dst, &hash, sizeof(*dst)); -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} - -#ifndef XXH_NO_XXH3 - -/* ********************************************************************* -* XXH3 -* New generation hash designed for speed on small keys and vectorization -************************************************************************ */ -/*! - * @} - * @defgroup XXH3_impl XXH3 implementation - * @ingroup impl - * @{ - */ - -/* === Compiler specifics === */ - -#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ -# define XXH_RESTRICT /* disable */ -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ -# define XXH_RESTRICT restrict -#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ - || (defined (__clang__)) \ - || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \ - || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) -/* - * There are a LOT more compilers that recognize __restrict but this - * covers the major ones. - */ -# define XXH_RESTRICT __restrict -#else -# define XXH_RESTRICT /* disable */ -#endif - -#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ - || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ - || defined(__clang__) -# define XXH_likely(x) __builtin_expect(x, 1) -# define XXH_unlikely(x) __builtin_expect(x, 0) -#else -# define XXH_likely(x) (x) -# define XXH_unlikely(x) (x) -#endif - -#ifndef XXH_HAS_INCLUDE -# ifdef __has_include -# define XXH_HAS_INCLUDE(x) __has_include(x) -# else -# define XXH_HAS_INCLUDE(x) 0 -# endif -#endif - -#if defined(__GNUC__) || defined(__clang__) -# if defined(__ARM_FEATURE_SVE) -# include -# endif -# if defined(__ARM_NEON__) || defined(__ARM_NEON) \ - || (defined(_M_ARM) && _M_ARM >= 7) \ - || defined(_M_ARM64) || defined(_M_ARM64EC) \ - || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* WASM SIMD128 via SIMDe */ -# define inline __inline__ /* circumvent a clang bug */ -# include -# undef inline -# elif defined(__AVX2__) -# include -# elif defined(__SSE2__) -# include -# endif -#endif - -#if defined(_MSC_VER) -# include -#endif - -/* - * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while - * remaining a true 64-bit/128-bit hash function. - * - * This is done by prioritizing a subset of 64-bit operations that can be - * emulated without too many steps on the average 32-bit machine. - * - * For example, these two lines seem similar, and run equally fast on 64-bit: - * - * xxh_u64 x; - * x ^= (x >> 47); // good - * x ^= (x >> 13); // bad - * - * However, to a 32-bit machine, there is a major difference. - * - * x ^= (x >> 47) looks like this: - * - * x.lo ^= (x.hi >> (47 - 32)); - * - * while x ^= (x >> 13) looks like this: - * - * // note: funnel shifts are not usually cheap. - * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); - * x.hi ^= (x.hi >> 13); - * - * The first one is significantly faster than the second, simply because the - * shift is larger than 32. This means: - * - All the bits we need are in the upper 32 bits, so we can ignore the lower - * 32 bits in the shift. - * - The shift result will always fit in the lower 32 bits, and therefore, - * we can ignore the upper 32 bits in the xor. - * - * Thanks to this optimization, XXH3 only requires these features to be efficient: - * - * - Usable unaligned access - * - A 32-bit or 64-bit ALU - * - If 32-bit, a decent ADC instruction - * - A 32 or 64-bit multiply with a 64-bit result - * - For the 128-bit variant, a decent byteswap helps short inputs. - * - * The first two are already required by XXH32, and almost all 32-bit and 64-bit - * platforms which can run XXH32 can run XXH3 efficiently. - * - * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one - * notable exception. - * - * First of all, Thumb-1 lacks support for the UMULL instruction which - * performs the important long multiply. This means numerous __aeabi_lmul - * calls. - * - * Second of all, the 8 functional registers are just not enough. - * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need - * Lo registers, and this shuffling results in thousands more MOVs than A32. - * - * A32 and T32 don't have this limitation. They can access all 14 registers, - * do a 32->64 multiply with UMULL, and the flexible operand allowing free - * shifts is helpful, too. - * - * Therefore, we do a quick sanity check. - * - * If compiling Thumb-1 for a target which supports ARM instructions, we will - * emit a warning, as it is not a "sane" platform to compile for. - * - * Usually, if this happens, it is because of an accident and you probably need - * to specify -march, as you likely meant to compile for a newer architecture. - * - * Credit: large sections of the vectorial and asm source code paths - * have been contributed by @easyaspi314 - */ -#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) -# warning "XXH3 is highly inefficient without ARM or Thumb-2." -#endif - -/* ========================================== - * Vectorization detection - * ========================================== */ - -#ifdef XXH_DOXYGEN -/*! - * @ingroup tuning - * @brief Overrides the vectorization implementation chosen for XXH3. - * - * Can be defined to 0 to disable SIMD or any of the values mentioned in - * @ref XXH_VECTOR_TYPE. - * - * If this is not defined, it uses predefined macros to determine the best - * implementation. - */ -# define XXH_VECTOR XXH_SCALAR -/*! - * @ingroup tuning - * @brief Possible values for @ref XXH_VECTOR. - * - * Note that these are actually implemented as macros. - * - * If this is not defined, it is detected automatically. - * internal macro XXH_X86DISPATCH overrides this. - */ -enum XXH_VECTOR_TYPE /* fake enum */ { - XXH_SCALAR = 0, /*!< Portable scalar version */ - XXH_SSE2 = 1, /*!< - * SSE2 for Pentium 4, Opteron, all x86_64. - * - * @note SSE2 is also guaranteed on Windows 10, macOS, and - * Android x86. - */ - XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */ - XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */ - XXH_NEON = 4, /*!< - * NEON for most ARMv7-A, all AArch64, and WASM SIMD128 - * via the SIMDeverywhere polyfill provided with the - * Emscripten SDK. - */ - XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */ - XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */ -}; -/*! - * @ingroup tuning - * @brief Selects the minimum alignment for XXH3's accumulators. - * - * When using SIMD, this should match the alignment required for said vector - * type, so, for example, 32 for AVX2. - * - * Default: Auto detected. - */ -# define XXH_ACC_ALIGN 8 -#endif - -/* Actual definition */ -#ifndef XXH_DOXYGEN -# define XXH_SCALAR 0 -# define XXH_SSE2 1 -# define XXH_AVX2 2 -# define XXH_AVX512 3 -# define XXH_NEON 4 -# define XXH_VSX 5 -# define XXH_SVE 6 -#endif - -#ifndef XXH_VECTOR /* can be defined on command line */ -# if defined(__ARM_FEATURE_SVE) -# define XXH_VECTOR XXH_SVE -# elif ( \ - defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \ - || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \ - || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* wasm simd128 via SIMDe */ \ - ) && ( \ - defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ - ) -# define XXH_VECTOR XXH_NEON -# elif defined(__AVX512F__) -# define XXH_VECTOR XXH_AVX512 -# elif defined(__AVX2__) -# define XXH_VECTOR XXH_AVX2 -# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) -# define XXH_VECTOR XXH_SSE2 -# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ - || (defined(__s390x__) && defined(__VEC__)) \ - && defined(__GNUC__) /* TODO: IBM XL */ -# define XXH_VECTOR XXH_VSX -# else -# define XXH_VECTOR XXH_SCALAR -# endif -#endif - -/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */ -#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE) -# ifdef _MSC_VER -# pragma warning(once : 4606) -# else -# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead." -# endif -# undef XXH_VECTOR -# define XXH_VECTOR XXH_SCALAR -#endif - -/* - * Controls the alignment of the accumulator, - * for compatibility with aligned vector loads, which are usually faster. - */ -#ifndef XXH_ACC_ALIGN -# if defined(XXH_X86DISPATCH) -# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ -# elif XXH_VECTOR == XXH_SCALAR /* scalar */ -# define XXH_ACC_ALIGN 8 -# elif XXH_VECTOR == XXH_SSE2 /* sse2 */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_AVX2 /* avx2 */ -# define XXH_ACC_ALIGN 32 -# elif XXH_VECTOR == XXH_NEON /* neon */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_VSX /* vsx */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_AVX512 /* avx512 */ -# define XXH_ACC_ALIGN 64 -# elif XXH_VECTOR == XXH_SVE /* sve */ -# define XXH_ACC_ALIGN 64 -# endif -#endif - -#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ - || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 -# define XXH_SEC_ALIGN XXH_ACC_ALIGN -#elif XXH_VECTOR == XXH_SVE -# define XXH_SEC_ALIGN XXH_ACC_ALIGN -#else -# define XXH_SEC_ALIGN 8 -#endif - -#if defined(__GNUC__) || defined(__clang__) -# define XXH_ALIASING __attribute__((may_alias)) -#else -# define XXH_ALIASING /* nothing */ -#endif - -/* - * UGLY HACK: - * GCC usually generates the best code with -O3 for xxHash. - * - * However, when targeting AVX2, it is overzealous in its unrolling resulting - * in code roughly 3/4 the speed of Clang. - * - * There are other issues, such as GCC splitting _mm256_loadu_si256 into - * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which - * only applies to Sandy and Ivy Bridge... which don't even support AVX2. - * - * That is why when compiling the AVX2 version, it is recommended to use either - * -O2 -mavx2 -march=haswell - * or - * -O2 -mavx2 -mno-avx256-split-unaligned-load - * for decent performance, or to use Clang instead. - * - * Fortunately, we can control the first one with a pragma that forces GCC into - * -O2, but the other one we can't control without "failed to inline always - * inline function due to target mismatch" warnings. - */ -#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ - && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */ -# pragma GCC push_options -# pragma GCC optimize("-O2") -#endif - -#if XXH_VECTOR == XXH_NEON - -/* - * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3 - * optimizes out the entire hashLong loop because of the aliasing violation. - * - * However, GCC is also inefficient at load-store optimization with vld1q/vst1q, - * so the only option is to mark it as aliasing. - */ -typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING; - -/*! - * @internal - * @brief `vld1q_u64` but faster and alignment-safe. - * - * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only - * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86). - * - * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it - * prohibits load-store optimizations. Therefore, a direct dereference is used. - * - * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe - * unaligned load. - */ -#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) -XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */ -{ - return *(xxh_aliasing_uint64x2_t const *)ptr; -} -#else -XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) -{ - return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr)); -} -#endif - -/*! - * @internal - * @brief `vmlal_u32` on low and high halves of a vector. - * - * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with - * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32` - * with `vmlal_u32`. - */ -#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11 -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - /* Inline assembly is the only way */ - __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs)); - return acc; -} -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - /* This intrinsic works as expected */ - return vmlal_high_u32(acc, lhs, rhs); -} -#else -/* Portable intrinsic versions */ -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs)); -} -/*! @copydoc XXH_vmlal_low_u32 - * Assume the compiler converts this to vmlal_high_u32 on aarch64 */ -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs)); -} -#endif - -/*! - * @ingroup tuning - * @brief Controls the NEON to scalar ratio for XXH3 - * - * This can be set to 2, 4, 6, or 8. - * - * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used. - * - * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those - * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU - * bandwidth. - * - * This is even more noticeable on the more advanced cores like the Cortex-A76 which - * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once. - * - * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes - * and 2 scalar lanes, which is chosen by default. - * - * This does not apply to Apple processors or 32-bit processors, which run better with - * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes. - * - * This change benefits CPUs with large micro-op buffers without negatively affecting - * most other CPUs: - * - * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. | - * |:----------------------|:--------------------|----------:|-----------:|------:| - * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% | - * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% | - * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% | - * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% | - * - * It also seems to fix some bad codegen on GCC, making it almost as fast as clang. - * - * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning - * it effectively becomes worse 4. - * - * @see XXH3_accumulate_512_neon() - */ -# ifndef XXH3_NEON_LANES -# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \ - && !defined(__APPLE__) && XXH_SIZE_OPT <= 0 -# define XXH3_NEON_LANES 6 -# else -# define XXH3_NEON_LANES XXH_ACC_NB -# endif -# endif -#endif /* XXH_VECTOR == XXH_NEON */ - -/* - * VSX and Z Vector helpers. - * - * This is very messy, and any pull requests to clean this up are welcome. - * - * There are a lot of problems with supporting VSX and s390x, due to - * inconsistent intrinsics, spotty coverage, and multiple endiannesses. - */ -#if XXH_VECTOR == XXH_VSX -/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`, - * and `pixel`. This is a problem for obvious reasons. - * - * These keywords are unnecessary; the spec literally says they are - * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd - * after including the header. - * - * We use pragma push_macro/pop_macro to keep the namespace clean. */ -# pragma push_macro("bool") -# pragma push_macro("vector") -# pragma push_macro("pixel") -/* silence potential macro redefined warnings */ -# undef bool -# undef vector -# undef pixel - -# if defined(__s390x__) -# include -# else -# include -# endif - -/* Restore the original macro values, if applicable. */ -# pragma pop_macro("pixel") -# pragma pop_macro("vector") -# pragma pop_macro("bool") - -typedef __vector unsigned long long xxh_u64x2; -typedef __vector unsigned char xxh_u8x16; -typedef __vector unsigned xxh_u32x4; - -/* - * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue. - */ -typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING; - -# ifndef XXH_VSX_BE -# if defined(__BIG_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define XXH_VSX_BE 1 -# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ -# warning "-maltivec=be is not recommended. Please use native endianness." -# define XXH_VSX_BE 1 -# else -# define XXH_VSX_BE 0 -# endif -# endif /* !defined(XXH_VSX_BE) */ - -# if XXH_VSX_BE -# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) -# define XXH_vec_revb vec_revb -# else -/*! - * A polyfill for POWER9's vec_revb(). - */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) -{ - xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, - 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; - return vec_perm(val, val, vByteSwap); -} -# endif -# endif /* XXH_VSX_BE */ - -/*! - * Performs an unaligned vector load and byte swaps it on big endian. - */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) -{ - xxh_u64x2 ret; - XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2)); -# if XXH_VSX_BE - ret = XXH_vec_revb(ret); -# endif - return ret; -} - -/* - * vec_mulo and vec_mule are very problematic intrinsics on PowerPC - * - * These intrinsics weren't added until GCC 8, despite existing for a while, - * and they are endian dependent. Also, their meaning swap depending on version. - * */ -# if defined(__s390x__) - /* s390x is always big endian, no issue on this platform */ -# define XXH_vec_mulo vec_mulo -# define XXH_vec_mule vec_mule -# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__) -/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ - /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */ -# define XXH_vec_mulo __builtin_altivec_vmulouw -# define XXH_vec_mule __builtin_altivec_vmuleuw -# else -/* gcc needs inline assembly */ -/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) -{ - xxh_u64x2 result; - __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); - return result; -} -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) -{ - xxh_u64x2 result; - __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); - return result; -} -# endif /* XXH_vec_mulo, XXH_vec_mule */ -#endif /* XXH_VECTOR == XXH_VSX */ - -#if XXH_VECTOR == XXH_SVE -#define ACCRND(acc, offset) \ -do { \ - svuint64_t input_vec = svld1_u64(mask, xinput + offset); \ - svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \ - svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \ - svuint64_t swapped = svtbl_u64(input_vec, kSwap); \ - svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \ - svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \ - svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \ - acc = svadd_u64_x(mask, acc, mul); \ -} while (0) -#endif /* XXH_VECTOR == XXH_SVE */ - -/* prefetch - * can be disabled, by declaring XXH_NO_PREFETCH build macro */ -#if defined(XXH_NO_PREFETCH) -# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ -#else -# if XXH_SIZE_OPT >= 1 -# define XXH_PREFETCH(ptr) (void)(ptr) -# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# else -# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* XXH_NO_PREFETCH */ - - -/* ========================================== - * XXH3 default settings - * ========================================== */ - -#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ - -#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) -# error "default keyset is not large enough" -#endif - -/*! Pseudorandom secret taken directly from FARSH. */ -XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, -}; - -static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */ -static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */ - -#ifdef XXH_OLD_NAMES -# define kSecret XXH3_kSecret -#endif - -#ifdef XXH_DOXYGEN -/*! - * @brief Calculates a 32-bit to 64-bit long multiply. - * - * Implemented as a macro. - * - * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't - * need to (but it shouldn't need to anyways, it is about 7 instructions to do - * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we - * use that instead of the normal method. - * - * If you are compiling for platforms like Thumb-1 and don't have a better option, - * you may also want to write your own long multiply routine here. - * - * @param x, y Numbers to be multiplied - * @return 64-bit product of the low 32 bits of @p x and @p y. - */ -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64(xxh_u64 x, xxh_u64 y) -{ - return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); -} -#elif defined(_MSC_VER) && defined(_M_IX86) -# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) -#else -/* - * Downcast + upcast is usually better than masking on older compilers like - * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers. - * - * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands - * and perform a full 64x64 multiply -- entirely redundant on 32-bit. - */ -# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) -#endif - -/*! - * @brief Calculates a 64->128-bit long multiply. - * - * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar - * version. - * - * @param lhs , rhs The 64-bit integers to be multiplied - * @return The 128-bit result represented in an @ref XXH128_hash_t. - */ -static XXH128_hash_t -XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) -{ - /* - * GCC/Clang __uint128_t method. - * - * On most 64-bit targets, GCC and Clang define a __uint128_t type. - * This is usually the best way as it usually uses a native long 64-bit - * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. - * - * Usually. - * - * Despite being a 32-bit platform, Clang (and emscripten) define this type - * despite not having the arithmetic for it. This results in a laggy - * compiler builtin call which calculates a full 128-bit multiply. - * In that case it is best to use the portable one. - * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 - */ -#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \ - && defined(__SIZEOF_INT128__) \ - || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - - __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; - XXH128_hash_t r128; - r128.low64 = (xxh_u64)(product); - r128.high64 = (xxh_u64)(product >> 64); - return r128; - - /* - * MSVC for x64's _umul128 method. - * - * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); - * - * This compiles to single operand MUL on x64. - */ -#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC) - -#ifndef _MSC_VER -# pragma intrinsic(_umul128) -#endif - xxh_u64 product_high; - xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); - XXH128_hash_t r128; - r128.low64 = product_low; - r128.high64 = product_high; - return r128; - - /* - * MSVC for ARM64's __umulh method. - * - * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method. - */ -#elif defined(_M_ARM64) || defined(_M_ARM64EC) - -#ifndef _MSC_VER -# pragma intrinsic(__umulh) -#endif - XXH128_hash_t r128; - r128.low64 = lhs * rhs; - r128.high64 = __umulh(lhs, rhs); - return r128; - -#else - /* - * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. - * - * This is a fast and simple grade school multiply, which is shown below - * with base 10 arithmetic instead of base 0x100000000. - * - * 9 3 // D2 lhs = 93 - * x 7 5 // D2 rhs = 75 - * ---------- - * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 - * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 - * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 - * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 - * --------- - * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 - * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 - * --------- - * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 - * - * The reasons for adding the products like this are: - * 1. It avoids manual carry tracking. Just like how - * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. - * This avoids a lot of complexity. - * - * 2. It hints for, and on Clang, compiles to, the powerful UMAAL - * instruction available in ARM's Digital Signal Processing extension - * in 32-bit ARMv6 and later, which is shown below: - * - * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) - * { - * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; - * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); - * *RdHi = (xxh_u32)(product >> 32); - * } - * - * This instruction was designed for efficient long multiplication, and - * allows this to be calculated in only 4 instructions at speeds - * comparable to some 64-bit ALUs. - * - * 3. It isn't terrible on other platforms. Usually this will be a couple - * of 32-bit ADD/ADCs. - */ - - /* First calculate all of the cross products. */ - xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); - xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); - xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); - xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); - - /* Now add the products together. These will never overflow. */ - xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; - xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; - xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); - - XXH128_hash_t r128; - r128.low64 = lower; - r128.high64 = upper; - return r128; -#endif -} - -/*! - * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it. - * - * The reason for the separate function is to prevent passing too many structs - * around by value. This will hopefully inline the multiply, but we don't force it. - * - * @param lhs , rhs The 64-bit integers to multiply - * @return The low 64 bits of the product XOR'd by the high 64 bits. - * @see XXH_mult64to128() - */ -static xxh_u64 -XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) -{ - XXH128_hash_t product = XXH_mult64to128(lhs, rhs); - return product.low64 ^ product.high64; -} - -/*! Seems to produce slightly better code on GCC for some reason. */ -XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) -{ - XXH_ASSERT(0 <= shift && shift < 64); - return v64 ^ (v64 >> shift); -} - -/* - * This is a fast avalanche stage, - * suitable when input bits are already partially mixed - */ -static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) -{ - h64 = XXH_xorshift64(h64, 37); - h64 *= PRIME_MX1; - h64 = XXH_xorshift64(h64, 32); - return h64; -} - -/* - * This is a stronger avalanche, - * inspired by Pelle Evensen's rrmxmx - * preferable when input has not been previously mixed - */ -static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) -{ - /* this mix is inspired by Pelle Evensen's rrmxmx */ - h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); - h64 *= PRIME_MX2; - h64 ^= (h64 >> 35) + len ; - h64 *= PRIME_MX2; - return XXH_xorshift64(h64, 28); -} - - -/* ========================================== - * Short keys - * ========================================== - * One of the shortcomings of XXH32 and XXH64 was that their performance was - * sub-optimal on short lengths. It used an iterative algorithm which strongly - * favored lengths that were a multiple of 4 or 8. - * - * Instead of iterating over individual inputs, we use a set of single shot - * functions which piece together a range of lengths and operate in constant time. - * - * Additionally, the number of multiplies has been significantly reduced. This - * reduces latency, especially when emulating 64-bit multiplies on 32-bit. - * - * Depending on the platform, this may or may not be faster than XXH32, but it - * is almost guaranteed to be faster than XXH64. - */ - -/* - * At very short lengths, there isn't enough input to fully hide secrets, or use - * the entire secret. - * - * There is also only a limited amount of mixing we can do before significantly - * impacting performance. - * - * Therefore, we use different sections of the secret and always mix two secret - * samples with an XOR. This should have no effect on performance on the - * seedless or withSeed variants because everything _should_ be constant folded - * by modern compilers. - * - * The XOR mixing hides individual parts of the secret and increases entropy. - * - * This adds an extra layer of strength for custom secrets. - */ -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(secret != NULL); - /* - * len = 1: combined = { input[0], 0x01, input[0], input[0] } - * len = 2: combined = { input[1], 0x02, input[0], input[1] } - * len = 3: combined = { input[2], 0x03, input[0], input[1] } - */ - { xxh_u8 const c1 = input[0]; - xxh_u8 const c2 = input[len >> 1]; - xxh_u8 const c3 = input[len - 1]; - xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) - | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); - xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; - xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; - return XXH64_avalanche(keyed); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len <= 8); - seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; - { xxh_u32 const input1 = XXH_readLE32(input); - xxh_u32 const input2 = XXH_readLE32(input + len - 4); - xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; - xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); - xxh_u64 const keyed = input64 ^ bitflip; - return XXH3_rrmxmx(keyed, len); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(9 <= len && len <= 16); - { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; - xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; - xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; - xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; - xxh_u64 const acc = len - + XXH_swap64(input_lo) + input_hi - + XXH3_mul128_fold64(input_lo, input_hi); - return XXH3_avalanche(acc); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(len <= 16); - { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); - if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); - if (len) return XXH3_len_1to3_64b(input, len, secret, seed); - return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); - } -} - -/* - * DISCLAIMER: There are known *seed-dependent* multicollisions here due to - * multiplication by zero, affecting hashes of lengths 17 to 240. - * - * However, they are very unlikely. - * - * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all - * unseeded non-cryptographic hashes, it does not attempt to defend itself - * against specially crafted inputs, only random inputs. - * - * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes - * cancelling out the secret is taken an arbitrary number of times (addressed - * in XXH3_accumulate_512), this collision is very unlikely with random inputs - * and/or proper seeding: - * - * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a - * function that is only called up to 16 times per hash with up to 240 bytes of - * input. - * - * This is not too bad for a non-cryptographic hash function, especially with - * only 64 bit outputs. - * - * The 128-bit variant (which trades some speed for strength) is NOT affected - * by this, although it is always a good idea to use a proper seed if you care - * about strength. - */ -XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, - const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) -{ -#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ - /* - * UGLY HACK: - * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in - * slower code. - * - * By forcing seed64 into a register, we disrupt the cost model and - * cause it to scalarize. See `XXH32_round()` - * - * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, - * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on - * GCC 9.2, despite both emitting scalar code. - * - * GCC generates much better scalar code than Clang for the rest of XXH3, - * which is why finding a more optimal codepath is an interest. - */ - XXH_COMPILER_GUARD(seed64); -#endif - { xxh_u64 const input_lo = XXH_readLE64(input); - xxh_u64 const input_hi = XXH_readLE64(input+8); - return XXH3_mul128_fold64( - input_lo ^ (XXH_readLE64(secret) + seed64), - input_hi ^ (XXH_readLE64(secret+8) - seed64) - ); - } -} - -/* For mid range keys, XXH3 uses a Mum-hash variant. */ -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(16 < len && len <= 128); - - { xxh_u64 acc = len * XXH_PRIME64_1; -#if XXH_SIZE_OPT >= 1 - /* Smaller and cleaner, but slightly slower. */ - unsigned int i = (unsigned int)(len - 1) / 32; - do { - acc += XXH3_mix16B(input+16 * i, secret+32*i, seed); - acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed); - } while (i-- != 0); -#else - if (len > 32) { - if (len > 64) { - if (len > 96) { - acc += XXH3_mix16B(input+48, secret+96, seed); - acc += XXH3_mix16B(input+len-64, secret+112, seed); - } - acc += XXH3_mix16B(input+32, secret+64, seed); - acc += XXH3_mix16B(input+len-48, secret+80, seed); - } - acc += XXH3_mix16B(input+16, secret+32, seed); - acc += XXH3_mix16B(input+len-32, secret+48, seed); - } - acc += XXH3_mix16B(input+0, secret+0, seed); - acc += XXH3_mix16B(input+len-16, secret+16, seed); -#endif - return XXH3_avalanche(acc); - } -} - -#define XXH3_MIDSIZE_MAX 240 - -XXH_NO_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - - #define XXH3_MIDSIZE_STARTOFFSET 3 - #define XXH3_MIDSIZE_LASTOFFSET 17 - - { xxh_u64 acc = len * XXH_PRIME64_1; - xxh_u64 acc_end; - unsigned int const nbRounds = (unsigned int)len / 16; - unsigned int i; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - for (i=0; i<8; i++) { - acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); - } - /* last bytes */ - acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); - XXH_ASSERT(nbRounds >= 8); - acc = XXH3_avalanche(acc); -#if defined(__clang__) /* Clang */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. - * In everywhere else, it uses scalar code. - * - * For 64->128-bit multiplies, even if the NEON was 100% optimal, it - * would still be slower than UMAAL (see XXH_mult64to128). - * - * Unfortunately, Clang doesn't handle the long multiplies properly and - * converts them to the nonexistent "vmulq_u64" intrinsic, which is then - * scalarized into an ugly mess of VMOV.32 instructions. - * - * This mess is difficult to avoid without turning autovectorization - * off completely, but they are usually relatively minor and/or not - * worth it to fix. - * - * This loop is the easiest to fix, as unlike XXH32, this pragma - * _actually works_ because it is a loop vectorization instead of an - * SLP vectorization. - */ - #pragma clang loop vectorize(disable) -#endif - for (i=8 ; i < nbRounds; i++) { - /* - * Prevents clang for unrolling the acc loop and interleaving with this one. - */ - XXH_COMPILER_GUARD(acc); - acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); - } - return XXH3_avalanche(acc + acc_end); - } -} - - -/* ======= Long Keys ======= */ - -#define XXH_STRIPE_LEN 64 -#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ -#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) - -#ifdef XXH_OLD_NAMES -# define STRIPE_LEN XXH_STRIPE_LEN -# define ACC_NB XXH_ACC_NB -#endif - -#ifndef XXH_PREFETCH_DIST -# ifdef __clang__ -# define XXH_PREFETCH_DIST 320 -# else -# if (XXH_VECTOR == XXH_AVX512) -# define XXH_PREFETCH_DIST 512 -# else -# define XXH_PREFETCH_DIST 384 -# endif -# endif /* __clang__ */ -#endif /* XXH_PREFETCH_DIST */ - -/* - * These macros are to generate an XXH3_accumulate() function. - * The two arguments select the name suffix and target attribute. - * - * The name of this symbol is XXH3_accumulate_() and it calls - * XXH3_accumulate_512_(). - * - * It may be useful to hand implement this function if the compiler fails to - * optimize the inline function. - */ -#define XXH3_ACCUMULATE_TEMPLATE(name) \ -void \ -XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \ - const xxh_u8* XXH_RESTRICT input, \ - const xxh_u8* XXH_RESTRICT secret, \ - size_t nbStripes) \ -{ \ - size_t n; \ - for (n = 0; n < nbStripes; n++ ) { \ - const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \ - XXH_PREFETCH(in + XXH_PREFETCH_DIST); \ - XXH3_accumulate_512_##name( \ - acc, \ - in, \ - secret + n*XXH_SECRET_CONSUME_RATE); \ - } \ -} - - -XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) -{ - if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); - XXH_memcpy(dst, &v64, sizeof(v64)); -} - -/* Several intrinsic functions below are supposed to accept __int64 as argument, - * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . - * However, several environments do not define __int64 type, - * requiring a workaround. - */ -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) - typedef int64_t xxh_i64; -#else - /* the following type must have a width of 64-bit */ - typedef long long xxh_i64; -#endif - - -/* - * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. - * - * It is a hardened version of UMAC, based off of FARSH's implementation. - * - * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD - * implementations, and it is ridiculously fast. - * - * We harden it by mixing the original input to the accumulators as well as the product. - * - * This means that in the (relatively likely) case of a multiply by zero, the - * original input is preserved. - * - * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve - * cross-pollination, as otherwise the upper and lower halves would be - * essentially independent. - * - * This doesn't matter on 64-bit hashes since they all get merged together in - * the end, so we skip the extra step. - * - * Both XXH3_64bits and XXH3_128bits use this subroutine. - */ - -#if (XXH_VECTOR == XXH_AVX512) \ - || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) - -#ifndef XXH_TARGET_AVX512 -# define XXH_TARGET_AVX512 /* disable attribute target */ -#endif - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - __m512i* const xacc = (__m512i *) acc; - XXH_ASSERT((((size_t)acc) & 63) == 0); - XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); - - { - /* data_vec = input[0]; */ - __m512i const data_vec = _mm512_loadu_si512 (input); - /* key_vec = secret[0]; */ - __m512i const key_vec = _mm512_loadu_si512 (secret); - /* data_key = data_vec ^ key_vec; */ - __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); - /* xacc[0] += swap(data_vec); */ - __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); - __m512i const sum = _mm512_add_epi64(*xacc, data_swap); - /* xacc[0] += product; */ - *xacc = _mm512_add_epi64(product, sum); - } -} -XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512) - -/* - * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. - * - * Multiplication isn't perfect, as explained by Google in HighwayHash: - * - * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to - * // varying degrees. In descending order of goodness, bytes - * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. - * // As expected, the upper and lower bytes are much worse. - * - * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 - * - * Since our algorithm uses a pseudorandom secret to add some variance into the - * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. - * - * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid - * extraction. - * - * Both XXH3_64bits and XXH3_128bits use this subroutine. - */ - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 63) == 0); - XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); - { __m512i* const xacc = (__m512i*) acc; - const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); - - /* xacc[0] ^= (xacc[0] >> 47) */ - __m512i const acc_vec = *xacc; - __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); - /* xacc[0] ^= secret; */ - __m512i const key_vec = _mm512_loadu_si512 (secret); - __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */); - - /* xacc[0] *= XXH_PRIME32_1; */ - __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32); - __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); - __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); - *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); - } -} - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); - XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); - XXH_ASSERT(((size_t)customSecret & 63) == 0); - (void)(&XXH_writeLE64); - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); - __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64); - __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos); - - const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret); - __m512i* const dest = ( __m512i*) customSecret; - int i; - XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dest & 63) == 0); - for (i=0; i < nbRounds; ++i) { - dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed); - } } -} - -#endif - -#if (XXH_VECTOR == XXH_AVX2) \ - || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) - -#ifndef XXH_TARGET_AVX2 -# define XXH_TARGET_AVX2 /* disable attribute target */ -#endif - -XXH_FORCE_INLINE XXH_TARGET_AVX2 void -XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 31) == 0); - { __m256i* const xacc = (__m256i *) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xinput = (const __m256i *) input; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xsecret = (const __m256i *) secret; - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { - /* data_vec = xinput[i]; */ - __m256i const data_vec = _mm256_loadu_si256 (xinput+i); - /* key_vec = xsecret[i]; */ - __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); - /* data_key = data_vec ^ key_vec; */ - __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = _mm256_add_epi64(product, sum); - } } -} -XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2) - -XXH_FORCE_INLINE XXH_TARGET_AVX2 void -XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 31) == 0); - { __m256i* const xacc = (__m256i*) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xsecret = (const __m256i *) secret; - const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m256i const acc_vec = xacc[i]; - __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); - __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); - /* xacc[i] ^= xsecret; */ - __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); - __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32); - __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); - __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); - xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); - } - } -} - -XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); - XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); - (void)(&XXH_writeLE64); - XXH_PREFETCH(customSecret); - { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); - - const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret); - __m256i* dest = ( __m256i*) customSecret; - -# if defined(__GNUC__) || defined(__clang__) - /* - * On GCC & Clang, marking 'dest' as modified will cause the compiler: - * - do not extract the secret from sse registers in the internal loop - * - use less common registers, and avoid pushing these reg into stack - */ - XXH_COMPILER_GUARD(dest); -# endif - XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dest & 31) == 0); - - /* GCC -O2 need unroll loop manually */ - dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed); - dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed); - dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed); - dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed); - dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed); - dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed); - } -} - -#endif - -/* x86dispatch always generates SSE2 */ -#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) - -#ifndef XXH_TARGET_SSE2 -# define XXH_TARGET_SSE2 /* disable attribute target */ -#endif - -XXH_FORCE_INLINE XXH_TARGET_SSE2 void -XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - /* SSE2 is just a half-scale version of the AVX2 version. */ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { __m128i* const xacc = (__m128i *) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xinput = (const __m128i *) input; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xsecret = (const __m128i *) secret; - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { - /* data_vec = xinput[i]; */ - __m128i const data_vec = _mm_loadu_si128 (xinput+i); - /* key_vec = xsecret[i]; */ - __m128i const key_vec = _mm_loadu_si128 (xsecret+i); - /* data_key = data_vec ^ key_vec; */ - __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); - __m128i const sum = _mm_add_epi64(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = _mm_add_epi64(product, sum); - } } -} -XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2) - -XXH_FORCE_INLINE XXH_TARGET_SSE2 void -XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { __m128i* const xacc = (__m128i*) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xsecret = (const __m128i *) secret; - const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m128i const acc_vec = xacc[i]; - __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); - __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); - /* xacc[i] ^= xsecret[i]; */ - __m128i const key_vec = _mm_loadu_si128 (xsecret+i); - __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); - __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); - __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); - xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); - } - } -} - -XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); - (void)(&XXH_writeLE64); - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); - -# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 - /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ - XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) }; - __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); -# else - __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); -# endif - int i; - - const void* const src16 = XXH3_kSecret; - __m128i* dst16 = (__m128i*) customSecret; -# if defined(__GNUC__) || defined(__clang__) - /* - * On GCC & Clang, marking 'dest' as modified will cause the compiler: - * - do not extract the secret from sse registers in the internal loop - * - use less common registers, and avoid pushing these reg into stack - */ - XXH_COMPILER_GUARD(dst16); -# endif - XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dst16 & 15) == 0); - - for (i=0; i < nbRounds; ++i) { - dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed); - } } -} - -#endif - -#if (XXH_VECTOR == XXH_NEON) - -/* forward declarations for the scalar routines */ -XXH_FORCE_INLINE void -XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input, - void const* XXH_RESTRICT secret, size_t lane); - -XXH_FORCE_INLINE void -XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT secret, size_t lane); - -/*! - * @internal - * @brief The bulk processing loop for NEON and WASM SIMD128. - * - * The NEON code path is actually partially scalar when running on AArch64. This - * is to optimize the pipelining and can have up to 15% speedup depending on the - * CPU, and it also mitigates some GCC codegen issues. - * - * @see XXH3_NEON_LANES for configuring this and details about this optimization. - * - * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit - * integers instead of the other platforms which mask full 64-bit vectors, - * so the setup is more complicated than just shifting right. - * - * Additionally, there is an optimization for 4 lanes at once noted below. - * - * Since, as stated, the most optimal amount of lanes for Cortexes is 6, - * there needs to be *three* versions of the accumulate operation used - * for the remaining 2 lanes. - * - * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap - * nearly perfectly. - */ - -XXH_FORCE_INLINE void -XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0); - { /* GCC for darwin arm64 does not like aliasing here */ - xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc; - /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ - uint8_t const* xinput = (const uint8_t *) input; - uint8_t const* xsecret = (const uint8_t *) secret; - - size_t i; -#ifdef __wasm_simd128__ - /* - * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret - * is constant propagated, which results in it converting it to this - * inside the loop: - * - * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0) - * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0) - * ... - * - * This requires a full 32-bit address immediate (and therefore a 6 byte - * instruction) as well as an add for each offset. - * - * Putting an asm guard prevents it from folding (at the cost of losing - * the alignment hint), and uses the free offset in `v128.load` instead - * of adding secret_offset each time which overall reduces code size by - * about a kilobyte and improves performance. - */ - XXH_COMPILER_GUARD(xsecret); -#endif - /* Scalar lanes use the normal scalarRound routine */ - for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { - XXH3_scalarRound(acc, input, secret, i); - } - i = 0; - /* 4 NEON lanes at a time. */ - for (; i+1 < XXH3_NEON_LANES / 2; i+=2) { - /* data_vec = xinput[i]; */ - uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16)); - uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16)); - /* key_vec = xsecret[i]; */ - uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16)); - uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16)); - /* data_swap = swap(data_vec) */ - uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1); - uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1); - /* data_key = data_vec ^ key_vec; */ - uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1); - uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2); - - /* - * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a - * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to - * get one vector with the low 32 bits of each lane, and one vector - * with the high 32 bits of each lane. - * - * The intrinsic returns a double vector because the original ARMv7-a - * instruction modified both arguments in place. AArch64 and SIMD128 emit - * two instructions from this intrinsic. - * - * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] - * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] - */ - uint32x4x2_t unzipped = vuzpq_u32( - vreinterpretq_u32_u64(data_key_1), - vreinterpretq_u32_u64(data_key_2) - ); - /* data_key_lo = data_key & 0xFFFFFFFF */ - uint32x4_t data_key_lo = unzipped.val[0]; - /* data_key_hi = data_key >> 32 */ - uint32x4_t data_key_hi = unzipped.val[1]; - /* - * Then, we can split the vectors horizontally and multiply which, as for most - * widening intrinsics, have a variant that works on both high half vectors - * for free on AArch64. A similar instruction is available on SIMD128. - * - * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi - */ - uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi); - uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi); - /* - * Clang reorders - * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s - * c += a; // add acc.2d, acc.2d, swap.2d - * to - * c += a; // add acc.2d, acc.2d, swap.2d - * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s - * - * While it would make sense in theory since the addition is faster, - * for reasons likely related to umlal being limited to certain NEON - * pipelines, this is worse. A compiler guard fixes this. - */ - XXH_COMPILER_GUARD_CLANG_NEON(sum_1); - XXH_COMPILER_GUARD_CLANG_NEON(sum_2); - /* xacc[i] = acc_vec + sum; */ - xacc[i] = vaddq_u64(xacc[i], sum_1); - xacc[i+1] = vaddq_u64(xacc[i+1], sum_2); - } - /* Operate on the remaining NEON lanes 2 at a time. */ - for (; i < XXH3_NEON_LANES / 2; i++) { - /* data_vec = xinput[i]; */ - uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16)); - /* key_vec = xsecret[i]; */ - uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); - /* acc_vec_2 = swap(data_vec) */ - uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1); - /* data_key = data_vec ^ key_vec; */ - uint64x2_t data_key = veorq_u64(data_vec, key_vec); - /* For two lanes, just use VMOVN and VSHRN. */ - /* data_key_lo = data_key & 0xFFFFFFFF; */ - uint32x2_t data_key_lo = vmovn_u64(data_key); - /* data_key_hi = data_key >> 32; */ - uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32); - /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */ - uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi); - /* Same Clang workaround as before */ - XXH_COMPILER_GUARD_CLANG_NEON(sum); - /* xacc[i] = acc_vec + sum; */ - xacc[i] = vaddq_u64 (xacc[i], sum); - } - } -} -XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon) - -XXH_FORCE_INLINE void -XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - - { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc; - uint8_t const* xsecret = (uint8_t const*) secret; - - size_t i; - /* WASM uses operator overloads and doesn't need these. */ -#ifndef __wasm_simd128__ - /* { prime32_1, prime32_1 } */ - uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1); - /* { 0, prime32_1, 0, prime32_1 } */ - uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32)); -#endif - - /* AArch64 uses both scalar and neon at the same time */ - for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { - XXH3_scalarScrambleRound(acc, secret, i); - } - for (i=0; i < XXH3_NEON_LANES / 2; i++) { - /* xacc[i] ^= (xacc[i] >> 47); */ - uint64x2_t acc_vec = xacc[i]; - uint64x2_t shifted = vshrq_n_u64(acc_vec, 47); - uint64x2_t data_vec = veorq_u64(acc_vec, shifted); - - /* xacc[i] ^= xsecret[i]; */ - uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); - uint64x2_t data_key = veorq_u64(data_vec, key_vec); - /* xacc[i] *= XXH_PRIME32_1 */ -#ifdef __wasm_simd128__ - /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */ - xacc[i] = data_key * XXH_PRIME32_1; -#else - /* - * Expanded version with portable NEON intrinsics - * - * lo(x) * lo(y) + (hi(x) * lo(y) << 32) - * - * prod_hi = hi(data_key) * lo(prime) << 32 - * - * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector - * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits - * and avoid the shift. - */ - uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi); - /* Extract low bits for vmlal_u32 */ - uint32x2_t data_key_lo = vmovn_u64(data_key); - /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */ - xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo); -#endif - } - } -} -#endif - -#if (XXH_VECTOR == XXH_VSX) - -XXH_FORCE_INLINE void -XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - /* presumed aligned */ - xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; - xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */ - xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */ - xxh_u64x2 const v32 = { 32, 32 }; - size_t i; - for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { - /* data_vec = xinput[i]; */ - xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i); - /* key_vec = xsecret[i]; */ - xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); - xxh_u64x2 const data_key = data_vec ^ key_vec; - /* shuffled = (data_key << 32) | (data_key >> 32); */ - xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); - /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ - xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); - /* acc_vec = xacc[i]; */ - xxh_u64x2 acc_vec = xacc[i]; - acc_vec += product; - - /* swap high and low halves */ -#ifdef __s390x__ - acc_vec += vec_permi(data_vec, data_vec, 2); -#else - acc_vec += vec_xxpermdi(data_vec, data_vec, 2); -#endif - xacc[i] = acc_vec; - } -} -XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx) - -XXH_FORCE_INLINE void -XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - - { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; - const xxh_u8* const xsecret = (const xxh_u8*) secret; - /* constants */ - xxh_u64x2 const v32 = { 32, 32 }; - xxh_u64x2 const v47 = { 47, 47 }; - xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; - size_t i; - for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { - /* xacc[i] ^= (xacc[i] >> 47); */ - xxh_u64x2 const acc_vec = xacc[i]; - xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); - - /* xacc[i] ^= xsecret[i]; */ - xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); - xxh_u64x2 const data_key = data_vec ^ key_vec; - - /* xacc[i] *= XXH_PRIME32_1 */ - /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ - xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); - /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ - xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); - xacc[i] = prod_odd + (prod_even << v32); - } } -} - -#endif - -#if (XXH_VECTOR == XXH_SVE) - -XXH_FORCE_INLINE void -XXH3_accumulate_512_sve( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - uint64_t *xacc = (uint64_t *)acc; - const uint64_t *xinput = (const uint64_t *)(const void *)input; - const uint64_t *xsecret = (const uint64_t *)(const void *)secret; - svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - uint64_t element_count = svcntd(); - if (element_count >= 8) { - svbool_t mask = svptrue_pat_b64(SV_VL8); - svuint64_t vacc = svld1_u64(mask, xacc); - ACCRND(vacc, 0); - svst1_u64(mask, xacc, vacc); - } else if (element_count == 2) { /* sve128 */ - svbool_t mask = svptrue_pat_b64(SV_VL2); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 2); - svuint64_t acc2 = svld1_u64(mask, xacc + 4); - svuint64_t acc3 = svld1_u64(mask, xacc + 6); - ACCRND(acc0, 0); - ACCRND(acc1, 2); - ACCRND(acc2, 4); - ACCRND(acc3, 6); - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 2, acc1); - svst1_u64(mask, xacc + 4, acc2); - svst1_u64(mask, xacc + 6, acc3); - } else { - svbool_t mask = svptrue_pat_b64(SV_VL4); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 4); - ACCRND(acc0, 0); - ACCRND(acc1, 4); - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 4, acc1); - } -} - -XXH_FORCE_INLINE void -XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, - const xxh_u8* XXH_RESTRICT input, - const xxh_u8* XXH_RESTRICT secret, - size_t nbStripes) -{ - if (nbStripes != 0) { - uint64_t *xacc = (uint64_t *)acc; - const uint64_t *xinput = (const uint64_t *)(const void *)input; - const uint64_t *xsecret = (const uint64_t *)(const void *)secret; - svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - uint64_t element_count = svcntd(); - if (element_count >= 8) { - svbool_t mask = svptrue_pat_b64(SV_VL8); - svuint64_t vacc = svld1_u64(mask, xacc + 0); - do { - /* svprfd(svbool_t, void *, enum svfprop); */ - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(vacc, 0); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, vacc); - } else if (element_count == 2) { /* sve128 */ - svbool_t mask = svptrue_pat_b64(SV_VL2); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 2); - svuint64_t acc2 = svld1_u64(mask, xacc + 4); - svuint64_t acc3 = svld1_u64(mask, xacc + 6); - do { - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(acc0, 0); - ACCRND(acc1, 2); - ACCRND(acc2, 4); - ACCRND(acc3, 6); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 2, acc1); - svst1_u64(mask, xacc + 4, acc2); - svst1_u64(mask, xacc + 6, acc3); - } else { - svbool_t mask = svptrue_pat_b64(SV_VL4); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 4); - do { - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(acc0, 0); - ACCRND(acc1, 4); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 4, acc1); - } - } -} - -#endif - -/* scalar variants - universal */ - -#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__)) -/* - * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they - * emit an excess mask and a full 64-bit multiply-add (MADD X-form). - * - * While this might not seem like much, as AArch64 is a 64-bit architecture, only - * big Cortex designs have a full 64-bit multiplier. - * - * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit - * multiplies expand to 2-3 multiplies in microcode. This has a major penalty - * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline. - * - * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does - * not have this penalty and does the mask automatically. - */ -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) -{ - xxh_u64 ret; - /* note: %x = 64-bit register, %w = 32-bit register */ - __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc)); - return ret; -} -#else -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) -{ - return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc; -} -#endif - -/*! - * @internal - * @brief Scalar round for @ref XXH3_accumulate_512_scalar(). - * - * This is extracted to its own function because the NEON path uses a combination - * of NEON and scalar. - */ -XXH_FORCE_INLINE void -XXH3_scalarRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT input, - void const* XXH_RESTRICT secret, - size_t lane) -{ - xxh_u64* xacc = (xxh_u64*) acc; - xxh_u8 const* xinput = (xxh_u8 const*) input; - xxh_u8 const* xsecret = (xxh_u8 const*) secret; - XXH_ASSERT(lane < XXH_ACC_NB); - XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); - { - xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8); - xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8); - xacc[lane ^ 1] += data_val; /* swap adjacent lanes */ - xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]); - } -} - -/*! - * @internal - * @brief Processes a 64 byte block of data using the scalar path. - */ -XXH_FORCE_INLINE void -XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - size_t i; - /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */ -#if defined(__GNUC__) && !defined(__clang__) \ - && (defined(__arm__) || defined(__thumb2__)) \ - && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \ - && XXH_SIZE_OPT <= 0 -# pragma GCC unroll 8 -#endif - for (i=0; i < XXH_ACC_NB; i++) { - XXH3_scalarRound(acc, input, secret, i); - } -} -XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar) - -/*! - * @internal - * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar(). - * - * This is extracted to its own function because the NEON path uses a combination - * of NEON and scalar. - */ -XXH_FORCE_INLINE void -XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT secret, - size_t lane) -{ - xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ - const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ - XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); - XXH_ASSERT(lane < XXH_ACC_NB); - { - xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8); - xxh_u64 acc64 = xacc[lane]; - acc64 = XXH_xorshift64(acc64, 47); - acc64 ^= key64; - acc64 *= XXH_PRIME32_1; - xacc[lane] = acc64; - } -} - -/*! - * @internal - * @brief Scrambles the accumulators after a large chunk has been read - */ -XXH_FORCE_INLINE void -XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - size_t i; - for (i=0; i < XXH_ACC_NB; i++) { - XXH3_scalarScrambleRound(acc, secret, i); - } -} - -XXH_FORCE_INLINE void -XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - /* - * We need a separate pointer for the hack below, - * which requires a non-const pointer. - * Any decent compiler will optimize this out otherwise. - */ - const xxh_u8* kSecretPtr = XXH3_kSecret; - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); - -#if defined(__GNUC__) && defined(__aarch64__) - /* - * UGLY HACK: - * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are - * placed sequentially, in order, at the top of the unrolled loop. - * - * While MOVK is great for generating constants (2 cycles for a 64-bit - * constant compared to 4 cycles for LDR), it fights for bandwidth with - * the arithmetic instructions. - * - * I L S - * MOVK - * MOVK - * MOVK - * MOVK - * ADD - * SUB STR - * STR - * By forcing loads from memory (as the asm line causes the compiler to assume - * that XXH3_kSecretPtr has been changed), the pipelines are used more - * efficiently: - * I L S - * LDR - * ADD LDR - * SUB STR - * STR - * - * See XXH3_NEON_LANES for details on the pipsline. - * - * XXH3_64bits_withSeed, len == 256, Snapdragon 835 - * without hack: 2654.4 MB/s - * with hack: 3202.9 MB/s - */ - XXH_COMPILER_GUARD(kSecretPtr); -#endif - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; - int i; - for (i=0; i < nbRounds; i++) { - /* - * The asm hack causes the compiler to assume that kSecretPtr aliases with - * customSecret, and on aarch64, this prevented LDP from merging two - * loads together for free. Putting the loads together before the stores - * properly generates LDP. - */ - xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; - xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; - XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); - XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); - } } -} - - -typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t); -typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); -typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); - - -#if (XXH_VECTOR == XXH_AVX512) - -#define XXH3_accumulate_512 XXH3_accumulate_512_avx512 -#define XXH3_accumulate XXH3_accumulate_avx512 -#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 -#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 - -#elif (XXH_VECTOR == XXH_AVX2) - -#define XXH3_accumulate_512 XXH3_accumulate_512_avx2 -#define XXH3_accumulate XXH3_accumulate_avx2 -#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 -#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 - -#elif (XXH_VECTOR == XXH_SSE2) - -#define XXH3_accumulate_512 XXH3_accumulate_512_sse2 -#define XXH3_accumulate XXH3_accumulate_sse2 -#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 -#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 - -#elif (XXH_VECTOR == XXH_NEON) - -#define XXH3_accumulate_512 XXH3_accumulate_512_neon -#define XXH3_accumulate XXH3_accumulate_neon -#define XXH3_scrambleAcc XXH3_scrambleAcc_neon -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#elif (XXH_VECTOR == XXH_VSX) - -#define XXH3_accumulate_512 XXH3_accumulate_512_vsx -#define XXH3_accumulate XXH3_accumulate_vsx -#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#elif (XXH_VECTOR == XXH_SVE) -#define XXH3_accumulate_512 XXH3_accumulate_512_sve -#define XXH3_accumulate XXH3_accumulate_sve -#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#else /* scalar */ - -#define XXH3_accumulate_512 XXH3_accumulate_512_scalar -#define XXH3_accumulate XXH3_accumulate_scalar -#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#endif - -#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */ -# undef XXH3_initCustomSecret -# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar -#endif - -XXH_FORCE_INLINE void -XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, - const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; - size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; - size_t const nb_blocks = (len - 1) / block_len; - - size_t n; - - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); - - for (n = 0; n < nb_blocks; n++) { - f_acc(acc, input + n*block_len, secret, nbStripesPerBlock); - f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); - } - - /* last partial block */ - XXH_ASSERT(len > XXH_STRIPE_LEN); - { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; - XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); - f_acc(acc, input + nb_blocks*block_len, secret, nbStripes); - - /* last stripe */ - { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; -#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ - XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); - } } -} - -XXH_FORCE_INLINE xxh_u64 -XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) -{ - return XXH3_mul128_fold64( - acc[0] ^ XXH_readLE64(secret), - acc[1] ^ XXH_readLE64(secret+8) ); -} - -static XXH64_hash_t -XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) -{ - xxh_u64 result64 = start; - size_t i = 0; - - for (i = 0; i < 4; i++) { - result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); -#if defined(__clang__) /* Clang */ \ - && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Prevent autovectorization on Clang ARMv7-a. Exact same problem as - * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. - * XXH3_64bits, len == 256, Snapdragon 835: - * without hack: 2063.7 MB/s - * with hack: 2560.7 MB/s - */ - XXH_COMPILER_GUARD(result64); -#endif - } - - return XXH3_avalanche(result64); -} - -#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ - XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } - -XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; - - XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble); - - /* converge into final hash */ - XXH_STATIC_ASSERT(sizeof(acc) == 64); - /* do not align on 8, so that the secret is different from the accumulator */ -#define XXH_SECRET_MERGEACCS_START 11 - XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1); -} - -/* - * It's important for performance to transmit secret's size (when it's static) - * so that the compiler can properly optimize the vectorized loop. - * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set. - * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE - * breaks -Og, this is XXH_NO_INLINE. - */ -XXH3_WITH_SECRET_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); -} - -/* - * It's preferable for performance that XXH3_hashLong is not inlined, - * as it results in a smaller function for small data, easier to the instruction cache. - * Note that inside this no_inline function, we do inline the internal loop, - * and provide a statically defined secret size to allow optimization of vector loop. - */ -XXH_NO_INLINE XXH_PUREF XXH64_hash_t -XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; (void)secret; (void)secretLen; - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); -} - -/* - * XXH3_hashLong_64b_withSeed(): - * Generate a custom key based on alteration of default XXH3_kSecret with the seed, - * and then use this key for long mode hashing. - * - * This operation is decently fast but nonetheless costs a little bit of time. - * Try to avoid it whenever possible (typically when seed==0). - * - * It's important for performance that XXH3_hashLong is not inlined. Not sure - * why (uop cache maybe?), but the difference is large and easily measurable. - */ -XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, - XXH64_hash_t seed, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble, - XXH3_f_initCustomSecret f_initSec) -{ -#if XXH_SIZE_OPT <= 0 - if (seed == 0) - return XXH3_hashLong_64b_internal(input, len, - XXH3_kSecret, sizeof(XXH3_kSecret), - f_acc, f_scramble); -#endif - { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; - f_initSec(secret, seed); - return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), - f_acc, f_scramble); - } -} - -/* - * It's important for performance that XXH3_hashLong is not inlined. - */ -XXH_NO_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)secret; (void)secretLen; - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); -} - - -typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, - XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); - -XXH_FORCE_INLINE XXH64_hash_t -XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, - XXH3_hashLong64_f f_hashLong) -{ - XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); - /* - * If an action is to be taken if `secretLen` condition is not respected, - * it should be done here. - * For now, it's a contract pre-condition. - * Adding a check and a branch here would cost performance at every hash. - * Also, note that function signature doesn't offer room to return an error. - */ - if (len <= 16) - return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); - if (len <= 128) - return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); -} - - -/* === Public entry point === */ - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) -{ - return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed) -{ - return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); -} - -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - if (length <= XXH3_MIDSIZE_MAX) - return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); - return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize); -} - - -/* === XXH3 streaming === */ -#ifndef XXH_NO_STREAM -/* - * Malloc's a pointer that is always aligned to align. - * - * This must be freed with `XXH_alignedFree()`. - * - * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte - * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2 - * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON. - * - * This underalignment previously caused a rather obvious crash which went - * completely unnoticed due to XXH3_createState() not actually being tested. - * Credit to RedSpah for noticing this bug. - * - * The alignment is done manually: Functions like posix_memalign or _mm_malloc - * are avoided: To maintain portability, we would have to write a fallback - * like this anyways, and besides, testing for the existence of library - * functions without relying on external build tools is impossible. - * - * The method is simple: Overallocate, manually align, and store the offset - * to the original behind the returned pointer. - * - * Align must be a power of 2 and 8 <= align <= 128. - */ -static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align) -{ - XXH_ASSERT(align <= 128 && align >= 8); /* range check */ - XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ - XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ - { /* Overallocate to make room for manual realignment and an offset byte */ - xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); - if (base != NULL) { - /* - * Get the offset needed to align this pointer. - * - * Even if the returned pointer is aligned, there will always be - * at least one byte to store the offset to the original pointer. - */ - size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ - /* Add the offset for the now-aligned pointer */ - xxh_u8* ptr = base + offset; - - XXH_ASSERT((size_t)ptr % align == 0); - - /* Store the offset immediately before the returned pointer. */ - ptr[-1] = (xxh_u8)offset; - return ptr; - } - return NULL; - } -} -/* - * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass - * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. - */ -static void XXH_alignedFree(void* p) -{ - if (p != NULL) { - xxh_u8* ptr = (xxh_u8*)p; - /* Get the offset byte we added in XXH_malloc. */ - xxh_u8 offset = ptr[-1]; - /* Free the original malloc'd pointer */ - xxh_u8* base = ptr - offset; - XXH_free(base); - } -} -/*! @ingroup XXH3_family */ -/*! - * @brief Allocate an @ref XXH3_state_t. - * - * Must be freed with XXH3_freeState(). - * @return An allocated XXH3_state_t on success, `NULL` on failure. - */ -XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) -{ - XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); - if (state==NULL) return NULL; - XXH3_INITSTATE(state); - return state; -} - -/*! @ingroup XXH3_family */ -/*! - * @brief Frees an @ref XXH3_state_t. - * - * Must be allocated with XXH3_createState(). - * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). - * @return XXH_OK. - */ -XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) -{ - XXH_alignedFree(statePtr); - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API void -XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state) -{ - XXH_memcpy(dst_state, src_state, sizeof(*dst_state)); -} - -static void -XXH3_reset_internal(XXH3_state_t* statePtr, - XXH64_hash_t seed, - const void* secret, size_t secretSize) -{ - size_t const initStart = offsetof(XXH3_state_t, bufferedSize); - size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; - XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); - XXH_ASSERT(statePtr != NULL); - /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ - memset((char*)statePtr + initStart, 0, initLength); - statePtr->acc[0] = XXH_PRIME32_3; - statePtr->acc[1] = XXH_PRIME64_1; - statePtr->acc[2] = XXH_PRIME64_2; - statePtr->acc[3] = XXH_PRIME64_3; - statePtr->acc[4] = XXH_PRIME64_4; - statePtr->acc[5] = XXH_PRIME32_2; - statePtr->acc[6] = XXH_PRIME64_5; - statePtr->acc[7] = XXH_PRIME32_1; - statePtr->seed = seed; - statePtr->useSeed = (seed != 0); - statePtr->extSecret = (const unsigned char*)secret; - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); - statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; - statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) -{ - if (statePtr == NULL) return XXH_ERROR; - XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - if (statePtr == NULL) return XXH_ERROR; - XXH3_reset_internal(statePtr, 0, secret, secretSize); - if (secret == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) -{ - if (statePtr == NULL) return XXH_ERROR; - if (seed==0) return XXH3_64bits_reset(statePtr); - if ((seed != statePtr->seed) || (statePtr->extSecret != NULL)) - XXH3_initCustomSecret(statePtr->customSecret, seed); - XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64) -{ - if (statePtr == NULL) return XXH_ERROR; - if (secret == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; - XXH3_reset_internal(statePtr, seed64, secret, secretSize); - statePtr->useSeed = 1; /* always, even if seed64==0 */ - return XXH_OK; -} - -/*! - * @internal - * @brief Processes a large input for XXH3_update() and XXH3_digest_long(). - * - * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block. - * - * @param acc Pointer to the 8 accumulator lanes - * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block* - * @param nbStripesPerBlock Number of stripes in a block - * @param input Input pointer - * @param nbStripes Number of stripes to process - * @param secret Secret pointer - * @param secretLimit Offset of the last block in @p secret - * @param f_acc Pointer to an XXH3_accumulate implementation - * @param f_scramble Pointer to an XXH3_scrambleAcc implementation - * @return Pointer past the end of @p input after processing - */ -XXH_FORCE_INLINE const xxh_u8 * -XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, - size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, - const xxh_u8* XXH_RESTRICT input, size_t nbStripes, - const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE; - /* Process full blocks */ - if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) { - /* Process the initial partial block... */ - size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr; - - do { - /* Accumulate and scramble */ - f_acc(acc, input, initialSecret, nbStripesThisIter); - f_scramble(acc, secret + secretLimit); - input += nbStripesThisIter * XXH_STRIPE_LEN; - nbStripes -= nbStripesThisIter; - /* Then continue the loop with the full block size */ - nbStripesThisIter = nbStripesPerBlock; - initialSecret = secret; - } while (nbStripes >= nbStripesPerBlock); - *nbStripesSoFarPtr = 0; - } - /* Process a partial block */ - if (nbStripes > 0) { - f_acc(acc, input, initialSecret, nbStripes); - input += nbStripes * XXH_STRIPE_LEN; - *nbStripesSoFarPtr += nbStripes; - } - /* Return end pointer */ - return input; -} - -#ifndef XXH3_STREAM_USE_STACK -# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */ -# define XXH3_STREAM_USE_STACK 1 -# endif -#endif -/* - * Both XXH3_64bits_update and XXH3_128bits_update use this routine. - */ -XXH_FORCE_INLINE XXH_errorcode -XXH3_update(XXH3_state_t* XXH_RESTRICT const state, - const xxh_u8* XXH_RESTRICT input, size_t len, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } - - XXH_ASSERT(state != NULL); - { const xxh_u8* const bEnd = input + len; - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; -#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 - /* For some reason, gcc and MSVC seem to suffer greatly - * when operating accumulators directly into state. - * Operating into stack space seems to enable proper optimization. - * clang, on the other hand, doesn't seem to need this trick */ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; - XXH_memcpy(acc, state->acc, sizeof(acc)); -#else - xxh_u64* XXH_RESTRICT const acc = state->acc; -#endif - state->totalLen += len; - XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); - - /* small input : just fill in tmp buffer */ - if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) { - XXH_memcpy(state->buffer + state->bufferedSize, input, len); - state->bufferedSize += (XXH32_hash_t)len; - return XXH_OK; - } - - /* total input is now > XXH3_INTERNALBUFFER_SIZE */ - #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) - XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ - - /* - * Internal buffer is partially filled (always, except at beginning) - * Complete it, then consume it. - */ - if (state->bufferedSize) { - size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; - XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); - input += loadSize; - XXH3_consumeStripes(acc, - &state->nbStripesSoFar, state->nbStripesPerBlock, - state->buffer, XXH3_INTERNALBUFFER_STRIPES, - secret, state->secretLimit, - f_acc, f_scramble); - state->bufferedSize = 0; - } - XXH_ASSERT(input < bEnd); - if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { - size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN; - input = XXH3_consumeStripes(acc, - &state->nbStripesSoFar, state->nbStripesPerBlock, - input, nbStripes, - secret, state->secretLimit, - f_acc, f_scramble); - XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); - - } - /* Some remaining input (always) : buffer it */ - XXH_ASSERT(input < bEnd); - XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE); - XXH_ASSERT(state->bufferedSize == 0); - XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); - state->bufferedSize = (XXH32_hash_t)(bEnd-input); -#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 - /* save stack accumulators into state */ - XXH_memcpy(state->acc, acc, sizeof(acc)); -#endif - } - - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate, XXH3_scrambleAcc); -} - - -XXH_FORCE_INLINE void -XXH3_digest_long (XXH64_hash_t* acc, - const XXH3_state_t* state, - const unsigned char* secret) -{ - xxh_u8 lastStripe[XXH_STRIPE_LEN]; - const xxh_u8* lastStripePtr; - - /* - * Digest on a local copy. This way, the state remains unaltered, and it can - * continue ingesting more input afterwards. - */ - XXH_memcpy(acc, state->acc, sizeof(state->acc)); - if (state->bufferedSize >= XXH_STRIPE_LEN) { - /* Consume remaining stripes then point to remaining data in buffer */ - size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; - size_t nbStripesSoFar = state->nbStripesSoFar; - XXH3_consumeStripes(acc, - &nbStripesSoFar, state->nbStripesPerBlock, - state->buffer, nbStripes, - secret, state->secretLimit, - XXH3_accumulate, XXH3_scrambleAcc); - lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN; - } else { /* bufferedSize < XXH_STRIPE_LEN */ - /* Copy to temp buffer */ - size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; - XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ - XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); - XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); - lastStripePtr = lastStripe; - } - /* Last stripe */ - XXH3_accumulate_512(acc, - lastStripePtr, - secret + state->secretLimit - XXH_SECRET_LASTACC_START); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state) -{ - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; - if (state->totalLen > XXH3_MIDSIZE_MAX) { - XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; - XXH3_digest_long(acc, state, secret); - return XXH3_mergeAccs(acc, - secret + XXH_SECRET_MERGEACCS_START, - (xxh_u64)state->totalLen * XXH_PRIME64_1); - } - /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ - if (state->useSeed) - return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); - return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), - secret, state->secretLimit + XXH_STRIPE_LEN); -} -#endif /* !XXH_NO_STREAM */ - - -/* ========================================== - * XXH3 128 bits (a.k.a XXH128) - * ========================================== - * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, - * even without counting the significantly larger output size. - * - * For example, extra steps are taken to avoid the seed-dependent collisions - * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). - * - * This strength naturally comes at the cost of some speed, especially on short - * lengths. Note that longer hashes are about as fast as the 64-bit version - * due to it using only a slight modification of the 64-bit loop. - * - * XXH128 is also more oriented towards 64-bit machines. It is still extremely - * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). - */ - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - /* A doubled version of 1to3_64b with different constants. */ - XXH_ASSERT(input != NULL); - XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(secret != NULL); - /* - * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } - * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } - * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } - */ - { xxh_u8 const c1 = input[0]; - xxh_u8 const c2 = input[len >> 1]; - xxh_u8 const c3 = input[len - 1]; - xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) - | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); - xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); - xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; - xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; - xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; - xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; - XXH128_hash_t h128; - h128.low64 = XXH64_avalanche(keyed_lo); - h128.high64 = XXH64_avalanche(keyed_hi); - return h128; - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len <= 8); - seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; - { xxh_u32 const input_lo = XXH_readLE32(input); - xxh_u32 const input_hi = XXH_readLE32(input + len - 4); - xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); - xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; - xxh_u64 const keyed = input_64 ^ bitflip; - - /* Shift len to the left to ensure it is even, this avoids even multiplies. */ - XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); - - m128.high64 += (m128.low64 << 1); - m128.low64 ^= (m128.high64 >> 3); - - m128.low64 = XXH_xorshift64(m128.low64, 35); - m128.low64 *= PRIME_MX2; - m128.low64 = XXH_xorshift64(m128.low64, 28); - m128.high64 = XXH3_avalanche(m128.high64); - return m128; - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(9 <= len && len <= 16); - { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; - xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; - xxh_u64 const input_lo = XXH_readLE64(input); - xxh_u64 input_hi = XXH_readLE64(input + len - 8); - XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); - /* - * Put len in the middle of m128 to ensure that the length gets mixed to - * both the low and high bits in the 128x64 multiply below. - */ - m128.low64 += (xxh_u64)(len - 1) << 54; - input_hi ^= bitfliph; - /* - * Add the high 32 bits of input_hi to the high 32 bits of m128, then - * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to - * the high 64 bits of m128. - * - * The best approach to this operation is different on 32-bit and 64-bit. - */ - if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ - /* - * 32-bit optimized version, which is more readable. - * - * On 32-bit, it removes an ADC and delays a dependency between the two - * halves of m128.high64, but it generates an extra mask on 64-bit. - */ - m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); - } else { - /* - * 64-bit optimized (albeit more confusing) version. - * - * Uses some properties of addition and multiplication to remove the mask: - * - * Let: - * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) - * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) - * c = XXH_PRIME32_2 - * - * a + (b * c) - * Inverse Property: x + y - x == y - * a + (b * (1 + c - 1)) - * Distributive Property: x * (y + z) == (x * y) + (x * z) - * a + (b * 1) + (b * (c - 1)) - * Identity Property: x * 1 == x - * a + b + (b * (c - 1)) - * - * Substitute a, b, and c: - * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) - * - * Since input_hi.hi + input_hi.lo == input_hi, we get this: - * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) - */ - m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); - } - /* m128 ^= XXH_swap64(m128 >> 64); */ - m128.low64 ^= XXH_swap64(m128.high64); - - { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ - XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); - h128.high64 += m128.high64 * XXH_PRIME64_2; - - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = XXH3_avalanche(h128.high64); - return h128; - } } -} - -/* - * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN - */ -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(len <= 16); - { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); - if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); - if (len) return XXH3_len_1to3_128b(input, len, secret, seed); - { XXH128_hash_t h128; - xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); - xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); - h128.low64 = XXH64_avalanche(seed ^ bitflipl); - h128.high64 = XXH64_avalanche( seed ^ bitfliph); - return h128; - } } -} - -/* - * A bit slower than XXH3_mix16B, but handles multiply by zero better. - */ -XXH_FORCE_INLINE XXH128_hash_t -XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, - const xxh_u8* secret, XXH64_hash_t seed) -{ - acc.low64 += XXH3_mix16B (input_1, secret+0, seed); - acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); - acc.high64 += XXH3_mix16B (input_2, secret+16, seed); - acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); - return acc; -} - - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(16 < len && len <= 128); - - { XXH128_hash_t acc; - acc.low64 = len * XXH_PRIME64_1; - acc.high64 = 0; - -#if XXH_SIZE_OPT >= 1 - { - /* Smaller, but slightly slower. */ - unsigned int i = (unsigned int)(len - 1) / 32; - do { - acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed); - } while (i-- != 0); - } -#else - if (len > 32) { - if (len > 64) { - if (len > 96) { - acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); - } - acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); - } - acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); - } - acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); -#endif - { XXH128_hash_t h128; - h128.low64 = acc.low64 + acc.high64; - h128.high64 = (acc.low64 * XXH_PRIME64_1) - + (acc.high64 * XXH_PRIME64_4) - + ((len - seed) * XXH_PRIME64_2); - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); - return h128; - } - } -} - -XXH_NO_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - - { XXH128_hash_t acc; - unsigned i; - acc.low64 = len * XXH_PRIME64_1; - acc.high64 = 0; - /* - * We set as `i` as offset + 32. We do this so that unchanged - * `len` can be used as upper bound. This reaches a sweet spot - * where both x86 and aarch64 get simple agen and good codegen - * for the loop. - */ - for (i = 32; i < 160; i += 32) { - acc = XXH128_mix32B(acc, - input + i - 32, - input + i - 16, - secret + i - 32, - seed); - } - acc.low64 = XXH3_avalanche(acc.low64); - acc.high64 = XXH3_avalanche(acc.high64); - /* - * NB: `i <= len` will duplicate the last 32-bytes if - * len % 32 was zero. This is an unfortunate necessity to keep - * the hash result stable. - */ - for (i=160; i <= len; i += 32) { - acc = XXH128_mix32B(acc, - input + i - 32, - input + i - 16, - secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, - seed); - } - /* last bytes */ - acc = XXH128_mix32B(acc, - input + len - 16, - input + len - 32, - secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, - (XXH64_hash_t)0 - seed); - - { XXH128_hash_t h128; - h128.low64 = acc.low64 + acc.high64; - h128.high64 = (acc.low64 * XXH_PRIME64_1) - + (acc.high64 * XXH_PRIME64_4) - + ((len - seed) * XXH_PRIME64_2); - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); - return h128; - } - } -} - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; - - XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble); - - /* converge into final hash */ - XXH_STATIC_ASSERT(sizeof(acc) == 64); - XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - { XXH128_hash_t h128; - h128.low64 = XXH3_mergeAccs(acc, - secret + XXH_SECRET_MERGEACCS_START, - (xxh_u64)len * XXH_PRIME64_1); - h128.high64 = XXH3_mergeAccs(acc, - secret + secretSize - - sizeof(acc) - XXH_SECRET_MERGEACCS_START, - ~((xxh_u64)len * XXH_PRIME64_2)); - return h128; - } -} - -/* - * It's important for performance that XXH3_hashLong() is not inlined. - */ -XXH_NO_INLINE XXH_PUREF XXH128_hash_t -XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; (void)secret; (void)secretLen; - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_accumulate, XXH3_scrambleAcc); -} - -/* - * It's important for performance to pass @p secretLen (when it's static) - * to the compiler, so that it can properly optimize the vectorized loop. - * - * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE - * breaks -Og, this is XXH_NO_INLINE. - */ -XXH3_WITH_SECRET_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate, XXH3_scrambleAcc); -} - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble, - XXH3_f_initCustomSecret f_initSec) -{ - if (seed64 == 0) - return XXH3_hashLong_128b_internal(input, len, - XXH3_kSecret, sizeof(XXH3_kSecret), - f_acc, f_scramble); - { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; - f_initSec(secret, seed64); - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), - f_acc, f_scramble); - } -} - -/* - * It's important for performance that XXH3_hashLong is not inlined. - */ -XXH_NO_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSeed(const void* input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)secret; (void)secretLen; - return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, - XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); -} - -typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, - XXH64_hash_t, const void* XXH_RESTRICT, size_t); - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_128bits_internal(const void* input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, - XXH3_hashLong128_f f_hl128) -{ - XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); - /* - * If an action is to be taken if `secret` conditions are not respected, - * it should be done here. - * For now, it's a contract pre-condition. - * Adding a check and a branch here would cost performance at every hash. - */ - if (len <= 16) - return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); - if (len <= 128) - return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - return f_hl128(input, len, seed64, secret, secretLen); -} - - -/* === Public XXH128 API === */ - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_128bits_internal(input, len, 0, - XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_hashLong_128b_default); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_128bits_internal(input, len, 0, - (const xxh_u8*)secret, secretSize, - XXH3_hashLong_128b_withSecret); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ - return XXH3_128bits_internal(input, len, seed, - XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_hashLong_128b_withSeed); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); - return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ - return XXH3_128bits_withSeed(input, len, seed); -} - - -/* === XXH3 128-bit streaming === */ -#ifndef XXH_NO_STREAM -/* - * All initialization and update functions are identical to 64-bit streaming variant. - * The only difference is the finalization routine. - */ - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) -{ - return XXH3_64bits_reset(statePtr); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) -{ - return XXH3_64bits_reset_withSeed(statePtr, seed); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_64bits_update(state, input, len); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state) -{ - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; - if (state->totalLen > XXH3_MIDSIZE_MAX) { - XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; - XXH3_digest_long(acc, state, secret); - XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - { XXH128_hash_t h128; - h128.low64 = XXH3_mergeAccs(acc, - secret + XXH_SECRET_MERGEACCS_START, - (xxh_u64)state->totalLen * XXH_PRIME64_1); - h128.high64 = XXH3_mergeAccs(acc, - secret + state->secretLimit + XXH_STRIPE_LEN - - sizeof(acc) - XXH_SECRET_MERGEACCS_START, - ~((xxh_u64)state->totalLen * XXH_PRIME64_2)); - return h128; - } - } - /* len <= XXH3_MIDSIZE_MAX : short code */ - if (state->seed) - return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); - return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), - secret, state->secretLimit + XXH_STRIPE_LEN); -} -#endif /* !XXH_NO_STREAM */ -/* 128-bit utility functions */ - -#include /* memcmp, memcpy */ - -/* return : 1 is equal, 0 if different */ -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) -{ - /* note : XXH128_hash_t is compact, it has no padding byte */ - return !(memcmp(&h1, &h2, sizeof(h1))); -} - -/* This prototype is compatible with stdlib's qsort(). - * @return : >0 if *h128_1 > *h128_2 - * <0 if *h128_1 < *h128_2 - * =0 if *h128_1 == *h128_2 */ -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2) -{ - XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; - XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; - int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); - /* note : bets that, in most cases, hash values are different */ - if (hcmp) return hcmp; - return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); -} - - -/*====== Canonical representation ======*/ -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API void -XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) { - hash.high64 = XXH_swap64(hash.high64); - hash.low64 = XXH_swap64(hash.low64); - } - XXH_memcpy(dst, &hash.high64, sizeof(hash.high64)); - XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src) -{ - XXH128_hash_t h; - h.high64 = XXH_readBE64(src); - h.low64 = XXH_readBE64(src->digest + 8); - return h; -} - - - -/* ========================================== - * Secret generators - * ========================================== - */ -#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) - -XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) -{ - XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 ); - XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 ); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize) -{ -#if (XXH_DEBUGLEVEL >= 1) - XXH_ASSERT(secretBuffer != NULL); - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); -#else - /* production mode, assert() are disabled */ - if (secretBuffer == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; -#endif - - if (customSeedSize == 0) { - customSeed = XXH3_kSecret; - customSeedSize = XXH_SECRET_DEFAULT_SIZE; - } -#if (XXH_DEBUGLEVEL >= 1) - XXH_ASSERT(customSeed != NULL); -#else - if (customSeed == NULL) return XXH_ERROR; -#endif - - /* Fill secretBuffer with a copy of customSeed - repeat as needed */ - { size_t pos = 0; - while (pos < secretSize) { - size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize); - memcpy((char*)secretBuffer + pos, customSeed, toCopy); - pos += toCopy; - } } - - { size_t const nbSeg16 = secretSize / 16; - size_t n; - XXH128_canonical_t scrambler; - XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); - for (n=0; n { } }; +template <> +struct WrapBytes { + static inline PyObject* Wrap(const char* data, int64_t length) { + return PyUnicode_FromStringAndSize(data, length); + } +}; + template <> struct WrapBytes { static inline PyObject* Wrap(const char* data, int64_t length) { @@ -147,6 +154,13 @@ struct WrapBytes { } }; +template <> +struct WrapBytes { + static inline PyObject* Wrap(const char* data, int64_t length) { + return PyBytes_FromStringAndSize(data, length); + } +}; + template <> struct WrapBytes { static inline PyObject* Wrap(const char* data, int64_t length) { @@ -189,7 +203,9 @@ static inline bool ListTypeSupported(const DataType& type) { return true; case Type::FIXED_SIZE_LIST: case Type::LIST: - case Type::LARGE_LIST: { + case Type::LARGE_LIST: + case Type::LIST_VIEW: + case Type::LARGE_LIST_VIEW: { const auto& list_type = checked_cast(type); return ListTypeSupported(*list_type.value_type()); } @@ -241,7 +257,8 @@ Status SetBufferBase(PyArrayObject* arr, const std::shared_ptr& buffer) } inline void set_numpy_metadata(int type, const DataType* datatype, PyArray_Descr* out) { - auto metadata = reinterpret_cast(out->c_metadata); + auto metadata = + reinterpret_cast(PyDataType_C_METADATA(out)); if (type == NPY_DATETIME) { if (datatype->id() == Type::TIMESTAMP) { const auto& timestamp_type = checked_cast(*datatype); @@ -262,7 +279,7 @@ Status PyArray_NewFromPool(int nd, npy_intp* dims, PyArray_Descr* descr, MemoryP // // * Track allocations // * Get better performance through custom allocators - int64_t total_size = descr->elsize; + int64_t total_size = PyDataType_ELSIZE(descr); for (int i = 0; i < nd; ++i) { total_size *= dims[i]; } @@ -523,8 +540,9 @@ class PandasWriter { void SetDatetimeUnit(NPY_DATETIMEUNIT unit) { PyAcquireGIL lock; - auto date_dtype = reinterpret_cast( - PyArray_DESCR(reinterpret_cast(block_arr_.obj()))->c_metadata); + auto date_dtype = + reinterpret_cast(PyDataType_C_METADATA( + PyArray_DESCR(reinterpret_cast(block_arr_.obj())))); date_dtype->meta.base = unit; } @@ -606,40 +624,40 @@ inline Status ConvertAsPyObjects(const PandasOptions& options, const ChunkedArra using ArrayType = typename TypeTraits::ArrayType; using Scalar = typename MemoizationTraits::Scalar; - ::arrow::internal::ScalarMemoTable memo_table(options.pool); - std::vector unique_values; - int32_t memo_size = 0; - - auto WrapMemoized = [&](const Scalar& value, PyObject** out_values) { - int32_t memo_index; - RETURN_NOT_OK(memo_table.GetOrInsert(value, &memo_index)); - if (memo_index == memo_size) { - // New entry - RETURN_NOT_OK(wrap_func(value, out_values)); - unique_values.push_back(*out_values); - ++memo_size; - } else { - // Duplicate entry - Py_INCREF(unique_values[memo_index]); - *out_values = unique_values[memo_index]; + auto convert_chunks = [&](auto&& wrap_func) -> Status { + for (int c = 0; c < data.num_chunks(); c++) { + const auto& arr = arrow::internal::checked_cast(*data.chunk(c)); + RETURN_NOT_OK(internal::WriteArrayObjects(arr, wrap_func, out_values)); + out_values += arr.length(); } return Status::OK(); }; - auto WrapUnmemoized = [&](const Scalar& value, PyObject** out_values) { - return wrap_func(value, out_values); - }; - - for (int c = 0; c < data.num_chunks(); c++) { - const auto& arr = arrow::internal::checked_cast(*data.chunk(c)); - if (options.deduplicate_objects) { - RETURN_NOT_OK(internal::WriteArrayObjects(arr, WrapMemoized, out_values)); - } else { - RETURN_NOT_OK(internal::WriteArrayObjects(arr, WrapUnmemoized, out_values)); - } - out_values += arr.length(); + if (options.deduplicate_objects) { + // GH-40316: only allocate a memo table if deduplication is enabled. + ::arrow::internal::ScalarMemoTable memo_table(options.pool); + std::vector unique_values; + int32_t memo_size = 0; + + auto WrapMemoized = [&](const Scalar& value, PyObject** out_values) { + int32_t memo_index; + RETURN_NOT_OK(memo_table.GetOrInsert(value, &memo_index)); + if (memo_index == memo_size) { + // New entry + RETURN_NOT_OK(wrap_func(value, out_values)); + unique_values.push_back(*out_values); + ++memo_size; + } else { + // Duplicate entry + Py_INCREF(unique_values[memo_index]); + *out_values = unique_values[memo_index]; + } + return Status::OK(); + }; + return convert_chunks(std::move(WrapMemoized)); + } else { + return convert_chunks(std::forward(wrap_func)); } - return Status::OK(); } Status ConvertStruct(PandasOptions options, const ChunkedArray& data, @@ -736,9 +754,11 @@ Status DecodeDictionaries(MemoryPool* pool, const std::shared_ptr& den return Status::OK(); } -template -Status ConvertListsLike(PandasOptions options, const ChunkedArray& data, - PyObject** out_values) { +template +enable_if_list_like ConvertListsLike(PandasOptions options, + const ChunkedArray& data, + PyObject** out_values) { + using ListArrayT = typename TypeTraits::ArrayType; // Get column of underlying value arrays ArrayVector value_arrays; for (int c = 0; c < data.num_chunks(); c++) { @@ -812,6 +832,26 @@ Status ConvertListsLike(PandasOptions options, const ChunkedArray& data, return Status::OK(); } +// TODO GH-40579: optimize ListView conversion to avoid unnecessary copies +template +enable_if_list_view ConvertListsLike(PandasOptions options, + const ChunkedArray& data, + PyObject** out_values) { + using ListViewArrayType = typename TypeTraits::ArrayType; + using NonViewType = + std::conditional_t; + using NonViewClass = typename TypeTraits::ArrayType; + ArrayVector list_arrays; + for (int c = 0; c < data.num_chunks(); c++) { + const auto& arr = checked_cast(*data.chunk(c)); + ARROW_ASSIGN_OR_RAISE(auto non_view_array, + NonViewClass::FromListView(arr, options.pool)); + list_arrays.emplace_back(non_view_array); + } + auto chunked_array = std::make_shared(list_arrays); + return ConvertListsLike(options, *chunked_array, out_values); +} + template Status ConvertMapHelper(F1 resetRow, F2 addPairToRow, F3 stealRow, const ChunkedArray& data, PyArrayObject* py_keys, @@ -1154,7 +1194,8 @@ struct ObjectWriterVisitor { } template - enable_if_t::value || is_fixed_size_binary_type::value, + enable_if_t::value || is_binary_view_like_type::value || + is_fixed_size_binary_type::value, Status> Visit(const Type& type) { auto WrapValue = [](const std::string_view& view, PyObject** out) { @@ -1327,16 +1368,14 @@ struct ObjectWriterVisitor { } template - enable_if_t::value || is_var_length_list_type::value, - Status> - Visit(const T& type) { - using ArrayType = typename TypeTraits::ArrayType; + enable_if_t::value || is_list_view_type::value, Status> Visit( + const T& type) { if (!ListTypeSupported(*type.value_type())) { return Status::NotImplemented( "Not implemented type for conversion from List to Pandas: ", type.value_type()->ToString()); } - return ConvertListsLike(options, data, out_values); + return ConvertListsLike(options, data, out_values); } Status Visit(const MapType& type) { return ConvertMap(options, data, out_values); } @@ -1350,13 +1389,10 @@ struct ObjectWriterVisitor { std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value || std::is_same::value || (std::is_base_of::value && !std::is_same::value) || - std::is_base_of::value || - std::is_base_of::value, + std::is_base_of::value, Status> Visit(const Type& type) { return Status::NotImplemented("No implemented conversion to object dtype: ", @@ -2086,8 +2122,10 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions& break; case Type::STRING: // fall through case Type::LARGE_STRING: // fall through + case Type::STRING_VIEW: // fall through case Type::BINARY: // fall through case Type::LARGE_BINARY: + case Type::BINARY_VIEW: case Type::NA: // fall through case Type::FIXED_SIZE_BINARY: // fall through case Type::STRUCT: // fall through @@ -2189,6 +2227,8 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions& case Type::FIXED_SIZE_LIST: case Type::LIST: case Type::LARGE_LIST: + case Type::LIST_VIEW: + case Type::LARGE_LIST_VIEW: case Type::MAP: { auto list_type = std::static_pointer_cast(data.type()); if (!ListTypeSupported(*list_type->value_type())) { @@ -2273,6 +2313,14 @@ std::shared_ptr GetStorageChunkedArray(std::shared_ptr(std::move(storage_arrays), value_type); }; +// Helper function to decode RunEndEncodedArray +Result> GetDecodedChunkedArray( + std::shared_ptr arr) { + ARROW_ASSIGN_OR_RAISE(Datum decoded, compute::RunEndDecode(arr)); + DCHECK(decoded.is_chunked_array()); + return decoded.chunked_array(); +}; + class ConsolidatedBlockCreator : public PandasBlockCreator { public: using PandasBlockCreator::PandasBlockCreator; @@ -2302,6 +2350,11 @@ class ConsolidatedBlockCreator : public PandasBlockCreator { if (arrays_[column_index]->type()->id() == Type::EXTENSION) { arrays_[column_index] = GetStorageChunkedArray(arrays_[column_index]); } + // In case of a RunEndEncodedArray default to the values type + else if (arrays_[column_index]->type()->id() == Type::RUN_END_ENCODED) { + ARROW_ASSIGN_OR_RAISE(arrays_[column_index], + GetDecodedChunkedArray(arrays_[column_index])); + } return GetPandasWriterType(*arrays_[column_index], options_, out); } } @@ -2499,6 +2552,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options, std::shared_ptr arr, PyObject* py_ref, PyObject** out) { if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) { + // XXX we should return an error as below if options.zero_copy_only + // is true, but that would break compatibility with existing tests. const auto& dense_type = checked_cast(*arr->type()).value_type(); RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr)); @@ -2534,6 +2589,18 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options, if (arr->type()->id() == Type::EXTENSION) { arr = GetStorageChunkedArray(arr); } + // In case of a RunEndEncodedArray decode the array + else if (arr->type()->id() == Type::RUN_END_ENCODED) { + if (options.zero_copy_only) { + return Status::Invalid("Need to dencode a RunEndEncodedArray, but ", + "only zero-copy conversions allowed"); + } + ARROW_ASSIGN_OR_RAISE(arr, GetDecodedChunkedArray(arr)); + + // Because we built a new array when we decoded the RunEndEncodedArray + // the final resulting numpy array should own the memory through a Capsule + py_ref = nullptr; + } PandasWriter::type output_type; RETURN_NOT_OK(GetPandasWriterType(*arr, modified_options, &output_type)); diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/arrow_to_pandas.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/arrow_to_pandas.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/arrow_to_pandas.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/arrow_to_pandas.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/arrow_to_python_internal.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/arrow_to_python_internal.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/arrow_to_python_internal.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/arrow_to_python_internal.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/async.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/async.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/async.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/async.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/benchmark.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/benchmark.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/benchmark.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/benchmark.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/benchmark.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/benchmark.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/benchmark.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/benchmark.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/common.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/common.cc old mode 100644 new mode 100755 similarity index 80% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/common.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/common.cc index 6fe2ed4d..2f44a912 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/common.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/common.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/memory_pool.h" @@ -90,9 +91,15 @@ class PythonErrorDetail : public StatusDetail { std::string ToString() const override { // This is simple enough not to need the GIL - const auto ty = reinterpret_cast(exc_type_.obj()); - // XXX Should we also print traceback? - return std::string("Python exception: ") + ty->tp_name; + Result result = FormatImpl(); + + if (result.ok()) { + return result.ValueOrDie(); + } else { + // Fallback to just the exception type + const auto ty = reinterpret_cast(exc_type_.obj()); + return std::string("Python exception: ") + ty->tp_name; + } } void RestorePyError() const { @@ -131,6 +138,42 @@ class PythonErrorDetail : public StatusDetail { } protected: + Result FormatImpl() const { + PyAcquireGIL lock; + + // Use traceback.format_exception() + OwnedRef traceback_module; + RETURN_NOT_OK(internal::ImportModule("traceback", &traceback_module)); + + OwnedRef fmt_exception; + RETURN_NOT_OK(internal::ImportFromModule(traceback_module.obj(), "format_exception", + &fmt_exception)); + + OwnedRef formatted; + formatted.reset(PyObject_CallFunctionObjArgs(fmt_exception.obj(), exc_type_.obj(), + exc_value_.obj(), exc_traceback_.obj(), + NULL)); + RETURN_IF_PYERROR(); + + std::stringstream ss; + ss << "Python exception: "; + Py_ssize_t num_lines = PySequence_Length(formatted.obj()); + RETURN_IF_PYERROR(); + + for (Py_ssize_t i = 0; i < num_lines; ++i) { + Py_ssize_t line_size; + + PyObject* line = PySequence_GetItem(formatted.obj(), i); + RETURN_IF_PYERROR(); + + const char* data = PyUnicode_AsUTF8AndSize(line, &line_size); + RETURN_IF_PYERROR(); + + ss << std::string_view(data, line_size); + } + return ss.str(); + } + PythonErrorDetail() = default; OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_; diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/common.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/common.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/common.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/common.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/csv.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/csv.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/csv.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/csv.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/csv.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/csv.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/csv.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/csv.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/datetime.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/datetime.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/datetime.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/datetime.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/datetime.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/datetime.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/datetime.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/datetime.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/decimal.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/decimal.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/decimal.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/decimal.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/decimal.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/decimal.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/decimal.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/decimal.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/deserialize.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/deserialize.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/deserialize.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/deserialize.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/deserialize.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/deserialize.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/deserialize.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/deserialize.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/extension_type.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/extension_type.cc old mode 100644 new mode 100755 similarity index 99% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/extension_type.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/extension_type.cc index 3ccc171c..be66b4a1 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/extension_type.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/extension_type.cc @@ -72,7 +72,7 @@ PyObject* DeserializeExtInstance(PyObject* type_class, static const char* kExtensionName = "arrow.py_extension_type"; -std::string PyExtensionType::ToString() const { +std::string PyExtensionType::ToString(bool show_metadata) const { PyAcquireGIL lock; std::stringstream ss; diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/extension_type.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/extension_type.h old mode 100644 new mode 100755 similarity index 97% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/extension_type.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/extension_type.h index e433d9ac..e6523824 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/extension_type.h +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/extension_type.h @@ -33,7 +33,7 @@ class ARROW_PYTHON_EXPORT PyExtensionType : public ExtensionType { // Implement extensionType API std::string extension_name() const override { return extension_name_; } - std::string ToString() const override; + std::string ToString(bool show_metadata = false) const override; bool ExtensionEquals(const ExtensionType& other) const override; diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/filesystem.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/filesystem.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/filesystem.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/filesystem.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/filesystem.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/filesystem.h old mode 100644 new mode 100755 similarity index 90% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/filesystem.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/filesystem.h index 003fd5cb..194b226a --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/filesystem.h +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/filesystem.h @@ -26,9 +26,7 @@ #include "arrow/python/visibility.h" #include "arrow/util/macros.h" -namespace arrow { -namespace py { -namespace fs { +namespace arrow::py::fs { class ARROW_PYTHON_EXPORT PyFileSystemVtable { public: @@ -83,16 +81,24 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem { bool Equals(const FileSystem& other) const override; + /// \cond FALSE + using FileSystem::CreateDir; + using FileSystem::DeleteDirContents; + using FileSystem::GetFileInfo; + using FileSystem::OpenAppendStream; + using FileSystem::OpenOutputStream; + /// \endcond + Result GetFileInfo(const std::string& path) override; Result> GetFileInfo( const std::vector& paths) override; Result> GetFileInfo( const arrow::fs::FileSelector& select) override; - Status CreateDir(const std::string& path, bool recursive = true) override; + Status CreateDir(const std::string& path, bool recursive) override; Status DeleteDir(const std::string& path) override; - Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override; + Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override; Status DeleteRootDirContents() override; Status DeleteFile(const std::string& path) override; @@ -107,10 +113,10 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem { const std::string& path) override; Result> OpenOutputStream( const std::string& path, - const std::shared_ptr& metadata = {}) override; + const std::shared_ptr& metadata) override; Result> OpenAppendStream( const std::string& path, - const std::shared_ptr& metadata = {}) override; + const std::shared_ptr& metadata) override; Result NormalizePath(std::string path) override; @@ -121,6 +127,4 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem { PyFileSystemVtable vtable_; }; -} // namespace fs -} // namespace py -} // namespace arrow +} // namespace arrow::py::fs diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/flight.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/flight.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/flight.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/flight.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/flight.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/flight.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/flight.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/flight.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/gdb.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/gdb.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/gdb.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/gdb.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/gdb.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/gdb.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/gdb.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/gdb.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/helpers.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/helpers.cc old mode 100644 new mode 100755 similarity index 99% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/helpers.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/helpers.cc index c266abc1..2c86c86a --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/helpers.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/helpers.cc @@ -63,6 +63,8 @@ std::shared_ptr GetPrimitiveType(Type::type type) { GET_PRIMITIVE_TYPE(STRING, utf8); GET_PRIMITIVE_TYPE(LARGE_BINARY, large_binary); GET_PRIMITIVE_TYPE(LARGE_STRING, large_utf8); + GET_PRIMITIVE_TYPE(BINARY_VIEW, binary_view); + GET_PRIMITIVE_TYPE(STRING_VIEW, utf8_view); GET_PRIMITIVE_TYPE(INTERVAL_MONTH_DAY_NANO, month_day_nano_interval); default: return nullptr; diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/helpers.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/helpers.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/helpers.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/helpers.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/inference.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/inference.cc old mode 100644 new mode 100755 similarity index 99% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/inference.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/inference.cc index 9537aec5..10116f9a --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/inference.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/inference.cc @@ -468,10 +468,7 @@ class TypeInferrer { if (numpy_dtype_count_ > 0) { // All NumPy scalars and Nones/nulls if (numpy_dtype_count_ + none_count_ == total_count_) { - std::shared_ptr type; - RETURN_NOT_OK(NumPyDtypeToArrow(numpy_unifier_.current_dtype(), &type)); - *out = type; - return Status::OK(); + return NumPyDtypeToArrow(numpy_unifier_.current_dtype()).Value(out); } // The "bad path": data contains a mix of NumPy scalars and diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/inference.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/inference.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/inference.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/inference.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/init.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/init.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/init.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/init.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/init.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/init.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/init.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/init.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/io.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/io.cc old mode 100644 new mode 100755 similarity index 96% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/io.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/io.cc index 43f8297c..197f8b9d --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/io.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/io.cc @@ -92,9 +92,12 @@ class PythonFile { Status Seek(int64_t position, int whence) { RETURN_NOT_OK(CheckClosed()); + // NOTE: `long long` is at least 64 bits in the C standard, the cast below is + // therefore safe. + // whence: 0 for relative to start of file, 2 for end of file - PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(ni)", - static_cast(position), whence); + PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(Li)", + static_cast(position), whence); Py_XDECREF(result); PY_RETURN_IF_ERROR(StatusCode::IOError); return Status::OK(); @@ -103,16 +106,16 @@ class PythonFile { Status Read(int64_t nbytes, PyObject** out) { RETURN_NOT_OK(CheckClosed()); - PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(n)", - static_cast(nbytes)); + PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(L)", + static_cast(nbytes)); PY_RETURN_IF_ERROR(StatusCode::IOError); *out = result; return Status::OK(); } Status ReadBuffer(int64_t nbytes, PyObject** out) { - PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(n)", - static_cast(nbytes)); + PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(L)", + static_cast(nbytes)); PY_RETURN_IF_ERROR(StatusCode::IOError); *out = result; return Status::OK(); diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/io.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/io.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/io.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/io.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/ipc.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/ipc.cc new file mode 100755 index 00000000..0ed15224 --- /dev/null +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/ipc.cc @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "ipc.h" + +#include + +#include "arrow/compute/cast.h" +#include "arrow/python/pyarrow.h" + +namespace arrow { +namespace py { + +PyRecordBatchReader::PyRecordBatchReader() {} + +Status PyRecordBatchReader::Init(std::shared_ptr schema, PyObject* iterable) { + schema_ = std::move(schema); + + iterator_.reset(PyObject_GetIter(iterable)); + return CheckPyError(); +} + +std::shared_ptr PyRecordBatchReader::schema() const { return schema_; } + +Status PyRecordBatchReader::ReadNext(std::shared_ptr* batch) { + PyAcquireGIL lock; + + if (!iterator_) { + // End of stream + batch->reset(); + return Status::OK(); + } + + OwnedRef py_batch(PyIter_Next(iterator_.obj())); + if (!py_batch) { + RETURN_IF_PYERROR(); + // End of stream + batch->reset(); + iterator_.reset(); + return Status::OK(); + } + + return unwrap_batch(py_batch.obj()).Value(batch); +} + +Result> PyRecordBatchReader::Make( + std::shared_ptr schema, PyObject* iterable) { + auto reader = std::shared_ptr(new PyRecordBatchReader()); + RETURN_NOT_OK(reader->Init(std::move(schema), iterable)); + return reader; +} + +CastingRecordBatchReader::CastingRecordBatchReader() = default; + +Status CastingRecordBatchReader::Init(std::shared_ptr parent, + std::shared_ptr schema) { + std::shared_ptr src = parent->schema(); + + // The check for names has already been done in Python where it's easier to + // generate a nice error message. + int num_fields = schema->num_fields(); + if (src->num_fields() != num_fields) { + return Status::Invalid("Number of fields not equal"); + } + + // Ensure all columns can be cast before succeeding + for (int i = 0; i < num_fields; i++) { + if (!compute::CanCast(*src->field(i)->type(), *schema->field(i)->type())) { + return Status::TypeError("Field ", i, " cannot be cast from ", + src->field(i)->type()->ToString(), " to ", + schema->field(i)->type()->ToString()); + } + } + + parent_ = std::move(parent); + schema_ = std::move(schema); + + return Status::OK(); +} + +std::shared_ptr CastingRecordBatchReader::schema() const { return schema_; } + +Status CastingRecordBatchReader::ReadNext(std::shared_ptr* batch) { + std::shared_ptr out; + ARROW_RETURN_NOT_OK(parent_->ReadNext(&out)); + if (!out) { + batch->reset(); + return Status::OK(); + } + + auto num_columns = out->num_columns(); + auto options = compute::CastOptions::Safe(); + ArrayVector columns(num_columns); + for (int i = 0; i < num_columns; i++) { + const Array& src = *out->column(i); + if (!schema_->field(i)->nullable() && src.null_count() > 0) { + return Status::Invalid( + "Can't cast array that contains nulls to non-nullable field at index ", i); + } + + ARROW_ASSIGN_OR_RAISE(columns[i], + compute::Cast(src, schema_->field(i)->type(), options)); + } + + *batch = RecordBatch::Make(schema_, out->num_rows(), std::move(columns)); + return Status::OK(); +} + +Result> CastingRecordBatchReader::Make( + std::shared_ptr parent, std::shared_ptr schema) { + auto reader = std::shared_ptr(new CastingRecordBatchReader()); + ARROW_RETURN_NOT_OK(reader->Init(parent, schema)); + return reader; +} + +Status CastingRecordBatchReader::Close() { return parent_->Close(); } + +} // namespace py +} // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/ipc.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/ipc.h old mode 100644 new mode 100755 similarity index 73% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/ipc.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/ipc.h index 92232ed8..2c16d8c9 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/ipc.h +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/ipc.h @@ -48,5 +48,25 @@ class ARROW_PYTHON_EXPORT PyRecordBatchReader : public RecordBatchReader { OwnedRefNoGIL iterator_; }; +class ARROW_PYTHON_EXPORT CastingRecordBatchReader : public RecordBatchReader { + public: + std::shared_ptr schema() const override; + + Status ReadNext(std::shared_ptr* batch) override; + + static Result> Make( + std::shared_ptr parent, std::shared_ptr schema); + + Status Close() override; + + protected: + CastingRecordBatchReader(); + + Status Init(std::shared_ptr parent, std::shared_ptr schema); + + std::shared_ptr parent_; + std::shared_ptr schema_; +}; + } // namespace py } // namespace arrow diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/iterators.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/iterators.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/iterators.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/iterators.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/lib.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/lib.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/lib.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/lib.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/lib_api.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/lib_api.h old mode 100644 new mode 100755 similarity index 51% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/lib_api.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/lib_api.h index 12bb219b..6c4fee27 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/lib_api.h +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/lib_api.h @@ -1,4 +1,4 @@ -/* Generated by Cython 3.0.8 */ +/* Generated by Cython 3.0.10 */ #ifndef __PYX_HAVE_API__pyarrow__lib #define __PYX_HAVE_API__pyarrow__lib @@ -102,9 +102,9 @@ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_table)(PyObject *) = 0; #define pyarrow_is_table __pyx_api_f_7pyarrow_3lib_pyarrow_is_table static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch)(PyObject *) = 0; #define pyarrow_is_batch __pyx_api_f_7pyarrow_3lib_pyarrow_is_batch -#ifndef __PYX_HAVE_RT_ImportFunction_3_0_8 -#define __PYX_HAVE_RT_ImportFunction_3_0_8 -static int __Pyx_ImportFunction_3_0_8(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { +#ifndef __PYX_HAVE_RT_ImportFunction_3_0_10 +#define __PYX_HAVE_RT_ImportFunction_3_0_10 +static int __Pyx_ImportFunction_3_0_10(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { PyObject *d = 0; PyObject *cobj = 0; union { @@ -144,53 +144,53 @@ static int import_pyarrow__lib(void) { PyObject *module = 0; module = PyImport_ImportModule("pyarrow.lib"); if (!module) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "box_memory_pool", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_box_memory_pool, "PyObject *( arrow::MemoryPool *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_resizable_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer, "PyObject *(std::shared_ptr< arrow::ResizableBuffer> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor, "PyObject *(std::shared_ptr< arrow::SparseCOOTensor> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSCMatrix> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor, "PyObject *(std::shared_ptr< arrow::SparseCSFTensor> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSRMatrix> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table> const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer, "std::shared_ptr< arrow::Buffer> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type, "std::shared_ptr< arrow::DataType> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field, "std::shared_ptr< arrow::Field> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar, "std::shared_ptr< arrow::Scalar> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array, "std::shared_ptr< arrow::Array> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array, "std::shared_ptr< arrow::ChunkedArray> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor, "std::shared_ptr< arrow::SparseCOOTensor> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix, "std::shared_ptr< arrow::SparseCSCMatrix> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor, "std::shared_ptr< arrow::SparseCSFTensor> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix, "std::shared_ptr< arrow::SparseCSRMatrix> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch, "std::shared_ptr< arrow::RecordBatch> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table> (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_internal_convert_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status, "PyObject *(arrow::Status const &)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_metadata", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_field, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction_3_0_8(module, "pyarrow_is_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "box_memory_pool", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_box_memory_pool, "PyObject *( arrow::MemoryPool *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_resizable_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer, "PyObject *(std::shared_ptr< arrow::ResizableBuffer> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor, "PyObject *(std::shared_ptr< arrow::SparseCOOTensor> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSCMatrix> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor, "PyObject *(std::shared_ptr< arrow::SparseCSFTensor> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSRMatrix> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table> const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer, "std::shared_ptr< arrow::Buffer> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type, "std::shared_ptr< arrow::DataType> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field, "std::shared_ptr< arrow::Field> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar, "std::shared_ptr< arrow::Scalar> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array, "std::shared_ptr< arrow::Array> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array, "std::shared_ptr< arrow::ChunkedArray> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor, "std::shared_ptr< arrow::SparseCOOTensor> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix, "std::shared_ptr< arrow::SparseCSCMatrix> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor, "std::shared_ptr< arrow::SparseCSFTensor> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix, "std::shared_ptr< arrow::SparseCSRMatrix> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch, "std::shared_ptr< arrow::RecordBatch> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table> (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_internal_convert_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status, "PyObject *(arrow::Status const &)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_metadata", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_field, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_10(module, "pyarrow_is_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch, "int (PyObject *)") < 0) goto bad; Py_DECREF(module); module = 0; return 0; bad: diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_convert.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_convert.cc old mode 100644 new mode 100755 similarity index 90% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_convert.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_convert.cc index 49706807..5fd2cb51 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_convert.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_convert.cc @@ -46,7 +46,7 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) { PyArrayObject* ndarray = reinterpret_cast(ao); auto ptr = reinterpret_cast(PyArray_DATA(ndarray)); data_ = const_cast(ptr); - size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize; + size_ = PyArray_NBYTES(ndarray); capacity_ = size_; is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE); } @@ -59,12 +59,11 @@ NumPyBuffer::~NumPyBuffer() { #define TO_ARROW_TYPE_CASE(NPY_NAME, FACTORY) \ case NPY_##NPY_NAME: \ - *out = FACTORY(); \ - break; + return FACTORY(); namespace { -Status GetTensorType(PyObject* dtype, std::shared_ptr* out) { +Result> GetTensorType(PyObject* dtype) { if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) { return Status::TypeError("Did not pass numpy.dtype object"); } @@ -84,11 +83,8 @@ Status GetTensorType(PyObject* dtype, std::shared_ptr* out) { TO_ARROW_TYPE_CASE(FLOAT16, float16); TO_ARROW_TYPE_CASE(FLOAT32, float32); TO_ARROW_TYPE_CASE(FLOAT64, float64); - default: { - return Status::NotImplemented("Unsupported numpy type ", descr->type_num); - } } - return Status::OK(); + return Status::NotImplemented("Unsupported numpy type ", descr->type_num); } Status GetNumPyType(const DataType& type, int* type_num) { @@ -120,15 +116,21 @@ Status GetNumPyType(const DataType& type, int* type_num) { } // namespace -Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr* out) { +Result> NumPyScalarToArrowDataType(PyObject* scalar) { + PyArray_Descr* descr = PyArray_DescrFromScalar(scalar); + OwnedRef descr_ref(reinterpret_cast(descr)); + return NumPyDtypeToArrow(descr); +} + +Result> NumPyDtypeToArrow(PyObject* dtype) { if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) { return Status::TypeError("Did not pass numpy.dtype object"); } PyArray_Descr* descr = reinterpret_cast(dtype); - return NumPyDtypeToArrow(descr, out); + return NumPyDtypeToArrow(descr); } -Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr* out) { +Result> NumPyDtypeToArrow(PyArray_Descr* descr) { int type_num = fix_numpy_type_num(descr->type_num); switch (type_num) { @@ -148,23 +150,18 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr* out) { TO_ARROW_TYPE_CASE(UNICODE, utf8); case NPY_DATETIME: { auto date_dtype = - reinterpret_cast(descr->c_metadata); + reinterpret_cast(PyDataType_C_METADATA(descr)); switch (date_dtype->meta.base) { case NPY_FR_s: - *out = timestamp(TimeUnit::SECOND); - break; + return timestamp(TimeUnit::SECOND); case NPY_FR_ms: - *out = timestamp(TimeUnit::MILLI); - break; + return timestamp(TimeUnit::MILLI); case NPY_FR_us: - *out = timestamp(TimeUnit::MICRO); - break; + return timestamp(TimeUnit::MICRO); case NPY_FR_ns: - *out = timestamp(TimeUnit::NANO); - break; + return timestamp(TimeUnit::NANO); case NPY_FR_D: - *out = date32(); - break; + return date32(); case NPY_FR_GENERIC: return Status::NotImplemented("Unbound or generic datetime64 time unit"); default: @@ -173,32 +170,25 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr* out) { } break; case NPY_TIMEDELTA: { auto timedelta_dtype = - reinterpret_cast(descr->c_metadata); + reinterpret_cast(PyDataType_C_METADATA(descr)); switch (timedelta_dtype->meta.base) { case NPY_FR_s: - *out = duration(TimeUnit::SECOND); - break; + return duration(TimeUnit::SECOND); case NPY_FR_ms: - *out = duration(TimeUnit::MILLI); - break; + return duration(TimeUnit::MILLI); case NPY_FR_us: - *out = duration(TimeUnit::MICRO); - break; + return duration(TimeUnit::MICRO); case NPY_FR_ns: - *out = duration(TimeUnit::NANO); - break; + return duration(TimeUnit::NANO); case NPY_FR_GENERIC: return Status::NotImplemented("Unbound or generic timedelta64 time unit"); default: return Status::NotImplemented("Unsupported timedelta64 time unit"); } } break; - default: { - return Status::NotImplemented("Unsupported numpy type ", descr->type_num); - } } - return Status::OK(); + return Status::NotImplemented("Unsupported numpy type ", descr->type_num); } #undef TO_ARROW_TYPE_CASE @@ -230,9 +220,8 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, strides[i] = array_strides[i]; } - std::shared_ptr type; - RETURN_NOT_OK( - GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray)), &type)); + ARROW_ASSIGN_OR_RAISE( + auto type, GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray)))); *out = std::make_shared(type, data, shape, strides, dim_names); return Status::OK(); } @@ -435,9 +424,9 @@ Status NdarraysToSparseCOOTensor(MemoryPool* pool, PyObject* data_ao, PyObject* PyArrayObject* ndarray_data = reinterpret_cast(data_ao); std::shared_ptr data = std::make_shared(data_ao); - std::shared_ptr type_data; - RETURN_NOT_OK(GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray_data)), - &type_data)); + ARROW_ASSIGN_OR_RAISE( + auto type_data, + GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray_data)))); std::shared_ptr coords; RETURN_NOT_OK(NdarrayToTensor(pool, coords_ao, {}, &coords)); @@ -462,9 +451,9 @@ Status NdarraysToSparseCSXMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* PyArrayObject* ndarray_data = reinterpret_cast(data_ao); std::shared_ptr data = std::make_shared(data_ao); - std::shared_ptr type_data; - RETURN_NOT_OK(GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray_data)), - &type_data)); + ARROW_ASSIGN_OR_RAISE( + auto type_data, + GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray_data)))); std::shared_ptr indptr, indices; RETURN_NOT_OK(NdarrayToTensor(pool, indptr_ao, {}, &indptr)); @@ -491,9 +480,9 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject* const int ndim = static_cast(shape.size()); PyArrayObject* ndarray_data = reinterpret_cast(data_ao); std::shared_ptr data = std::make_shared(data_ao); - std::shared_ptr type_data; - RETURN_NOT_OK(GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray_data)), - &type_data)); + ARROW_ASSIGN_OR_RAISE( + auto type_data, + GetTensorType(reinterpret_cast(PyArray_DESCR(ndarray_data)))); std::vector> indptr(ndim - 1); std::vector> indices(ndim); diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_convert.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_convert.h old mode 100644 new mode 100755 similarity index 94% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_convert.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_convert.h index 10451077..2d1086e1 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_convert.h +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_convert.h @@ -49,9 +49,11 @@ class ARROW_PYTHON_EXPORT NumPyBuffer : public Buffer { }; ARROW_PYTHON_EXPORT -Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr* out); +Result> NumPyDtypeToArrow(PyObject* dtype); ARROW_PYTHON_EXPORT -Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr* out); +Result> NumPyDtypeToArrow(PyArray_Descr* descr); +ARROW_PYTHON_EXPORT +Result> NumPyScalarToArrowDataType(PyObject* scalar); ARROW_PYTHON_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, const std::vector& dim_names, diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_internal.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_internal.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_internal.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_internal.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_interop.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_interop.h old mode 100644 new mode 100755 similarity index 92% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_interop.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_interop.h index ce7baed2..7ea7d6e1 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_interop.h +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_interop.h @@ -67,6 +67,13 @@ #define NPY_INT32_IS_INT 0 #endif +// Backported NumPy 2 API (can be removed if numpy 2 is required) +#if NPY_ABI_VERSION < 0x02000000 +#define PyDataType_ELSIZE(descr) ((descr)->elsize) +#define PyDataType_C_METADATA(descr) ((descr)->c_metadata) +#define PyDataType_FIELDS(descr) ((descr)->fields) +#endif + namespace arrow { namespace py { diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_to_arrow.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_to_arrow.cc old mode 100644 new mode 100755 similarity index 96% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_to_arrow.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_to_arrow.cc index 2727ce32..460b1d0c --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_to_arrow.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_to_arrow.cc @@ -196,7 +196,7 @@ class NumPyConverter { mask_ = reinterpret_cast(mo); } length_ = static_cast(PyArray_SIZE(arr_)); - itemsize_ = static_cast(PyArray_DESCR(arr_)->elsize); + itemsize_ = static_cast(PyArray_ITEMSIZE(arr_)); stride_ = static_cast(PyArray_STRIDES(arr_)[0]); } @@ -296,7 +296,7 @@ class NumPyConverter { PyArrayObject* mask_; int64_t length_; int64_t stride_; - int itemsize_; + int64_t itemsize_; bool from_pandas_; compute::CastOptions cast_options_; @@ -462,8 +462,7 @@ template inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { RETURN_NOT_OK(PrepareInputData(data)); - std::shared_ptr input_type; - RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast(dtype_), &input_type)); + ARROW_ASSIGN_OR_RAISE(auto input_type, NumPyDtypeToArrow(dtype_)); if (!input_type->Equals(*type_)) { RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_, @@ -479,7 +478,8 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d RETURN_NOT_OK(PrepareInputData(data)); - auto date_dtype = reinterpret_cast(dtype_->c_metadata); + auto date_dtype = + reinterpret_cast(PyDataType_C_METADATA(dtype_)); if (dtype_->type_num == NPY_DATETIME) { // If we have inbound datetime64[D] data, this needs to be downcasted // separately here from int64_t to int32_t, because this data is not @@ -490,7 +490,7 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d Status s = StaticCastBuffer(**data, length_, pool_, data); RETURN_NOT_OK(s); } else { - RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast(dtype_), &input_type)); + ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_)); if (!input_type->Equals(*type_)) { // The null bitmap was already computed in VisitNative() RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, @@ -498,7 +498,7 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d } } } else { - RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast(dtype_), &input_type)); + ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_)); if (!input_type->Equals(*type_)) { RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_, cast_options_, pool_, data)); @@ -515,7 +515,8 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d RETURN_NOT_OK(PrepareInputData(data)); - auto date_dtype = reinterpret_cast(dtype_->c_metadata); + auto date_dtype = + reinterpret_cast(PyDataType_C_METADATA(dtype_)); if (dtype_->type_num == NPY_DATETIME) { // If we have inbound datetime64[D] data, this needs to be downcasted // separately here from int64_t to int32_t, because this data is not @@ -531,7 +532,7 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d } *data = std::move(result); } else { - RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast(dtype_), &input_type)); + ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_)); if (!input_type->Equals(*type_)) { // The null bitmap was already computed in VisitNative() RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, @@ -539,7 +540,7 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d } } } else { - RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast(dtype_), &input_type)); + ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_)); if (!input_type->Equals(*type_)) { RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_, cast_options_, pool_, data)); @@ -629,11 +630,11 @@ namespace { // NumPy unicode is UCS4/UTF32 always constexpr int kNumPyUnicodeSize = 4; -Status AppendUTF32(const char* data, int itemsize, int byteorder, +Status AppendUTF32(const char* data, int64_t itemsize, int byteorder, ::arrow::internal::ChunkedStringBuilder* builder) { // The binary \x00\x00\x00\x00 indicates a nul terminator in NumPy unicode, // so we need to detect that here to truncate if necessary. Yep. - int actual_length = 0; + Py_ssize_t actual_length = 0; for (; actual_length < itemsize / kNumPyUnicodeSize; ++actual_length) { const char* code_point = data + actual_length * kNumPyUnicodeSize; if ((*code_point == '\0') && (*(code_point + 1) == '\0') && @@ -706,7 +707,7 @@ Status NumPyConverter::Visit(const StringType& type) { auto AppendNonNullValue = [&](const uint8_t* data) { if (is_binary_type) { if (ARROW_PREDICT_TRUE(util::ValidateUTF8(data, itemsize_))) { - return builder.Append(data, itemsize_); + return builder.Append(data, static_cast(itemsize_)); } else { return Status::Invalid("Encountered non-UTF8 binary value: ", HexEncode(data, itemsize_)); @@ -751,12 +752,13 @@ Status NumPyConverter::Visit(const StructType& type) { PyAcquireGIL gil_lock; // Create converters for each struct type field - if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) { + if (PyDataType_FIELDS(dtype_) == NULL || !PyDict_Check(PyDataType_FIELDS(dtype_))) { return Status::TypeError("Expected struct array"); } for (auto field : type.fields()) { - PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str()); + PyObject* tup = + PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str()); if (tup == NULL) { return Status::Invalid("Missing field '", field->name(), "' in struct array"); } diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_to_arrow.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_to_arrow.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/numpy_to_arrow.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/numpy_to_arrow.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/parquet_encryption.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/parquet_encryption.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/parquet_encryption.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/parquet_encryption.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/parquet_encryption.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/parquet_encryption.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/parquet_encryption.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/parquet_encryption.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pch.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pch.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pch.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pch.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/platform.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/platform.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/platform.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/platform.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow_api.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow_api.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow_api.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow_api.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow_lib.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow_lib.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/pyarrow_lib.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/pyarrow_lib.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_test.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_test.cc old mode 100644 new mode 100755 similarity index 98% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_test.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_test.cc index 01ab8a30..746bf410 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_test.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_test.cc @@ -174,10 +174,14 @@ Status TestOwnedRefNoGILMoves() { } } -std::string FormatPythonException(const std::string& exc_class_name) { +std::string FormatPythonException(const std::string& exc_class_name, + const std::string& exc_value) { std::stringstream ss; ss << "Python exception: "; ss << exc_class_name; + ss << ": "; + ss << exc_value; + ss << "\n"; return ss.str(); } @@ -205,7 +209,8 @@ Status TestCheckPyErrorStatus() { } PyErr_SetString(PyExc_TypeError, "some error"); - ASSERT_OK(check_error(st, "some error", FormatPythonException("TypeError"))); + ASSERT_OK( + check_error(st, "some error", FormatPythonException("TypeError", "some error"))); ASSERT_TRUE(st.IsTypeError()); PyErr_SetString(PyExc_ValueError, "some error"); @@ -223,7 +228,8 @@ Status TestCheckPyErrorStatus() { } PyErr_SetString(PyExc_NotImplementedError, "some error"); - ASSERT_OK(check_error(st, "some error", FormatPythonException("NotImplementedError"))); + ASSERT_OK(check_error(st, "some error", + FormatPythonException("NotImplementedError", "some error"))); ASSERT_TRUE(st.IsNotImplemented()); // No override if a specific status code is given @@ -246,7 +252,8 @@ Status TestCheckPyErrorStatusNoGIL() { lock.release(); ASSERT_TRUE(st.IsUnknownError()); ASSERT_EQ(st.message(), "zzzt"); - ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError")); + ASSERT_EQ(st.detail()->ToString(), + FormatPythonException("ZeroDivisionError", "zzzt")); return Status::OK(); } } @@ -257,7 +264,7 @@ Status TestRestorePyErrorBasics() { ASSERT_FALSE(PyErr_Occurred()); ASSERT_TRUE(st.IsUnknownError()); ASSERT_EQ(st.message(), "zzzt"); - ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError")); + ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError", "zzzt")); RestorePyError(st); ASSERT_TRUE(PyErr_Occurred()); diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_test.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_test.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_test.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_test.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_to_arrow.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_to_arrow.cc old mode 100644 new mode 100755 similarity index 95% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_to_arrow.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_to_arrow.cc index 23b92598..79da4756 --- a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_to_arrow.cc +++ b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_to_arrow.cc @@ -386,8 +386,7 @@ class PyValue { } } else if (PyArray_CheckAnyScalarExact(obj)) { // validate that the numpy scalar has np.datetime64 dtype - std::shared_ptr numpy_type; - RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type)); + ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj)); if (!numpy_type->Equals(*type)) { return Status::NotImplemented("Expected np.datetime64 but got: ", numpy_type->ToString()); @@ -406,7 +405,7 @@ class PyValue { RETURN_NOT_OK(PopulateMonthDayNano::Field( obj, &output.months, &found_attrs)); // on relativeoffset weeks is a property calculated from days. On - // DateOffset is is a field on its own. timedelta doesn't have a weeks + // DateOffset is a field on its own. timedelta doesn't have a weeks // attribute. PyObject* pandas_date_offset_type = internal::BorrowPandasDataOffsetType(); bool is_date_offset = pandas_date_offset_type == (PyObject*)Py_TYPE(obj); @@ -466,8 +465,7 @@ class PyValue { } } else if (PyArray_CheckAnyScalarExact(obj)) { // validate that the numpy scalar has np.datetime64 dtype - std::shared_ptr numpy_type; - RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type)); + ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj)); if (!numpy_type->Equals(*type)) { return Status::NotImplemented("Expected np.timedelta64 but got: ", numpy_type->ToString()); @@ -488,6 +486,10 @@ class PyValue { return view.ParseString(obj); } + static Status Convert(const BinaryViewType*, const O&, I obj, PyBytesView& view) { + return view.ParseString(obj); + } + static Status Convert(const FixedSizeBinaryType* type, const O&, I obj, PyBytesView& view) { ARROW_RETURN_NOT_OK(view.ParseString(obj)); @@ -501,8 +503,8 @@ class PyValue { } template - static enable_if_string Convert(const T*, const O& options, I obj, - PyBytesView& view) { + static enable_if_t::value || is_string_view_type::value, Status> + Convert(const T*, const O& options, I obj, PyBytesView& view) { if (options.strict) { // Strict conversion, force output to be unicode / utf8 and validate that // any binary values are utf8 @@ -572,20 +574,15 @@ struct PyConverterTrait; template struct PyConverterTrait< - T, - enable_if_t<(!is_nested_type::value && !is_interval_type::value && - !is_extension_type::value && !is_binary_view_like_type::value) || - std::is_same::value>> { + T, enable_if_t<(!is_nested_type::value && !is_interval_type::value && + !is_extension_type::value) || + std::is_same::value>> { using type = PyPrimitiveConverter; }; template -struct PyConverterTrait> { - // not implemented -}; - -template -struct PyConverterTrait> { +struct PyConverterTrait< + T, enable_if_t::value || is_list_view_type::value>> { using type = PyListConverter; }; @@ -701,11 +698,22 @@ class PyPrimitiveConverter:: PyBytesView view_; }; +template +struct OffsetTypeTrait { + using type = typename T::offset_type; +}; + template -class PyPrimitiveConverter> +struct OffsetTypeTrait> { + using type = int64_t; +}; + +template +class PyPrimitiveConverter< + T, enable_if_t::value || is_binary_view_like_type::value>> : public PrimitiveConverter { public: - using OffsetType = typename T::offset_type; + using OffsetType = typename OffsetTypeTrait::type; Status Append(PyObject* value) override { if (PyValue::IsNull(this->options_, value)) { @@ -796,7 +804,6 @@ class PyListConverter : public ListConverter { return this->list_builder_->AppendNull(); } - RETURN_NOT_OK(this->list_builder_->Append()); if (PyArray_Check(value)) { RETURN_NOT_OK(AppendNdarray(value)); } else if (PySequence_Check(value)) { @@ -817,6 +824,21 @@ class PyListConverter : public ListConverter { } protected: + // MapType does not support args in the Append() method + Status AppendTo(const MapType*, int64_t size) { return this->list_builder_->Append(); } + + // FixedSizeListType does not support args in the Append() method + Status AppendTo(const FixedSizeListType*, int64_t size) { + return this->list_builder_->Append(); + } + + // ListType requires the size argument in the Append() method + // in order to be convertible to a ListViewType. ListViewType + // requires the size argument in the Append() method always. + Status AppendTo(const BaseListType*, int64_t size) { + return this->list_builder_->Append(true, size); + } + Status ValidateBuilder(const MapType*) { if (this->list_builder_->key_builder()->null_count() > 0) { return Status::Invalid("Invalid Map: key field cannot contain null values"); @@ -829,11 +851,14 @@ class PyListConverter : public ListConverter { Status AppendSequence(PyObject* value) { int64_t size = static_cast(PySequence_Size(value)); + RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); return this->value_converter_->Extend(value, size); } Status AppendIterable(PyObject* value) { + auto size = static_cast(PyObject_Size(value)); + RETURN_NOT_OK(AppendTo(this->list_type_, size)); PyObject* iterator = PyObject_GetIter(value); OwnedRef iter_ref(iterator); while (PyObject* item = PyIter_Next(iterator)) { @@ -850,6 +875,7 @@ class PyListConverter : public ListConverter { return Status::Invalid("Can only convert 1-dimensional array values"); } const int64_t size = PyArray_SIZE(ndarray); + RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); const auto value_type = this->value_converter_->builder()->type(); @@ -1043,7 +1069,8 @@ class PyStructConverter : public StructConverter case KeyKind::BYTES: return AppendDict(dict, bytes_field_names_.obj()); default: - RETURN_NOT_OK(InferKeyKind(PyDict_Items(dict))); + OwnedRef item_ref(PyDict_Items(dict)); + RETURN_NOT_OK(InferKeyKind(item_ref.obj())); if (key_kind_ == KeyKind::UNKNOWN) { // was unable to infer the type which means that all keys are absent return AppendEmpty(); @@ -1089,6 +1116,7 @@ class PyStructConverter : public StructConverter Result> GetKeyValuePair(PyObject* seq, int index) { PyObject* pair = PySequence_GetItem(seq, index); RETURN_IF_PYERROR(); + OwnedRef pair_ref(pair); // ensure reference count is decreased at scope end if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) { return internal::InvalidType(pair, "was expecting tuple of (key, value) pair"); } diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_to_arrow.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_to_arrow.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/python_to_arrow.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/python_to_arrow.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/serialize.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/serialize.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/serialize.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/serialize.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/serialize.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/serialize.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/serialize.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/serialize.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/type_traits.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/type_traits.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/type_traits.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/type_traits.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/udf.cc b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/udf.cc old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/udf.cc rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/udf.cc diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/udf.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/udf.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/udf.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/udf.h diff --git a/cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/visibility.h b/cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/visibility.h old mode 100644 new mode 100755 similarity index 100% rename from cpp/csp/python/adapters/vendored/pyarrow-15.0.0/arrow/python/visibility.h rename to cpp/csp/python/adapters/vendored/pyarrow-16.0.0/arrow/python/visibility.h diff --git a/setup.py b/setup.py index d8ec7577..a3f3c6e7 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ args = ["install"] if VCPKG_TRIPLET is not None: args.append(f"--triplet={VCPKG_TRIPLET}") + if os.name == "nt": subprocess.call(["bootstrap-vcpkg.bat"], cwd="vcpkg", shell=True) subprocess.call(["vcpkg.bat"] + args, cwd="vcpkg", shell=True) diff --git a/vcpkg.json b/vcpkg.json index a54276c5..cb6bfdb1 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -25,7 +25,8 @@ "overrides": [ { "name": "arrow", - "version": "15.0.0" + "version": "16.0.0", + "port-version" : 1 } ], "builtin-baseline": "04b0cf2b3fd1752d3c3db969cbc10ba0a4613cee"