-
Notifications
You must be signed in to change notification settings - Fork 3
Handle lz4 compression #30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Hind-M
wants to merge
17
commits into
QuantStack:main
Choose a base branch
from
Hind-M:compr_ser
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
714d6c3
First attempt for compression in serialization (not tested and to be …
Hind-M 148aecb
Clean up and fetch
Hind-M 7a792b3
More cleanup
Hind-M cb9c563
Fix fetching lz4
Hind-M ecb67e2
Add lz4 targets to be exported
Hind-M 7ef77be
Add lz4 compression in deserialization
Hind-M 5fd55aa
Add owning_arrow_array_private_data
Hind-M 856bc7d
Factorize
Hind-M b83fe2b
Remove macro from template
Hind-M 25ae702
Add fct and remove cout
Hind-M dbd4e77
Move implementation details to cpp and add check boundaries
Hind-M 533b71f
Factorize more
Hind-M c5eb667
Fix conflicts and rework serialization with compression
Hind-M 66b7660
Add compression test
Hind-M 0d949aa
Minor changes
Hind-M a778fff
Rework compression and add tests
Hind-M 273fc24
Add missing header in test
Hind-M File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Find LZ4 library and headers | ||
|
||
# This module defines: | ||
# LZ4_FOUND - True if lz4 is found | ||
# LZ4_INCLUDE_DIRS - LZ4 include directories | ||
# LZ4_LIBRARIES - Libraries needed to use LZ4 | ||
# LZ4_VERSION - LZ4 version number | ||
# | ||
|
||
find_package(PkgConfig) | ||
if(PKG_CONFIG_FOUND) | ||
pkg_check_modules(LZ4 QUIET liblz4) | ||
if(NOT LZ4_FOUND) | ||
message(STATUS "Did not find 'liblz4.pc', trying 'lz4.pc'") | ||
pkg_check_modules(LZ4 QUIET lz4) | ||
endif() | ||
endif() | ||
|
||
find_path(LZ4_INCLUDE_DIR lz4.h) | ||
# HINTS ${LZ4_INCLUDEDIR} ${LZ4_INCLUDE_DIRS}) | ||
find_library(LZ4_LIBRARY NAMES lz4 liblz4) | ||
# HINTS ${LZ4_LIBDIR} ${LZ4_LIBRARY_DIRS}) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(lz4 DEFAULT_MSG | ||
LZ4_LIBRARY LZ4_INCLUDE_DIR) | ||
mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY) | ||
|
||
set(LZ4_LIBRARIES ${LZ4_LIBRARY}) | ||
set(LZ4_INCLUDE_DIRS ${LZ4_INCLUDE_DIR}) | ||
|
||
if(LZ4_FOUND AND NOT TARGET lz4::lz4) | ||
add_library(lz4::lz4 UNKNOWN IMPORTED) | ||
set_target_properties(lz4::lz4 PROPERTIES | ||
IMPORTED_LOCATION "${LZ4_LIBRARIES}" | ||
INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIRS}") | ||
if (NOT TARGET LZ4::LZ4 AND TARGET lz4::lz4) | ||
add_library(LZ4::LZ4 ALIAS lz4::lz4) | ||
endif () | ||
endif() | ||
|
||
#TODO add version? |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,86 @@ | ||
|
||
#pragma once | ||
|
||
#include <vector> | ||
#include <utility> | ||
|
||
#include <sparrow/c_interface.hpp> | ||
#include <sparrow/utils/contracts.hpp> | ||
|
||
#include "sparrow_ipc/config/config.hpp" | ||
#include "sparrow_ipc/arrow_interface/arrow_array/private_data.hpp" | ||
|
||
namespace sparrow_ipc | ||
{ | ||
[[nodiscard]] SPARROW_IPC_API ArrowArray make_non_owning_arrow_array( | ||
SPARROW_IPC_API void release_arrow_array_children_and_dictionary(ArrowArray* array); | ||
|
||
template <ArrowPrivateData T> | ||
void arrow_array_release(ArrowArray* array) | ||
{ | ||
SPARROW_ASSERT_TRUE(array != nullptr) | ||
SPARROW_ASSERT_TRUE(array->release == std::addressof(arrow_array_release<T>)) | ||
|
||
SPARROW_ASSERT_TRUE(array->private_data != nullptr); | ||
|
||
delete static_cast<T*>(array->private_data); | ||
array->private_data = nullptr; | ||
array->buffers = nullptr; // The buffers were deleted with the private data | ||
|
||
release_arrow_array_children_and_dictionary(array); | ||
array->release = nullptr; | ||
} | ||
|
||
template <ArrowPrivateData T, typename Arg> | ||
void fill_arrow_array( | ||
ArrowArray& array, | ||
int64_t length, | ||
int64_t null_count, | ||
int64_t offset, | ||
std::vector<std::uint8_t*>&& buffers, | ||
size_t children_count, | ||
ArrowArray** children, | ||
ArrowArray* dictionary | ||
); | ||
ArrowArray* dictionary, | ||
Arg&& private_data_arg | ||
) | ||
{ | ||
SPARROW_ASSERT_TRUE(length >= 0); | ||
SPARROW_ASSERT_TRUE(null_count >= -1); | ||
SPARROW_ASSERT_TRUE(offset >= 0); | ||
|
||
SPARROW_IPC_API void release_non_owning_arrow_array(ArrowArray* array); | ||
array.length = length; | ||
array.null_count = null_count; | ||
array.offset = offset; | ||
array.n_children = static_cast<int64_t>(children_count); | ||
array.children = children; | ||
array.dictionary = dictionary; | ||
|
||
SPARROW_IPC_API void fill_non_owning_arrow_array( | ||
ArrowArray& array, | ||
auto private_data = new T(std::forward<Arg>(private_data_arg)); | ||
array.private_data = private_data; | ||
array.n_buffers = private_data->n_buffers(); | ||
array.buffers = private_data->buffers_ptrs(); | ||
|
||
array.release = &arrow_array_release<T>; | ||
} | ||
|
||
template <ArrowPrivateData T, typename Arg> | ||
[[nodiscard]] ArrowArray make_arrow_array( | ||
int64_t length, | ||
int64_t null_count, | ||
int64_t offset, | ||
std::vector<std::uint8_t*>&& buffers, | ||
size_t children_count, | ||
ArrowArray** children, | ||
ArrowArray* dictionary | ||
); | ||
} | ||
ArrowArray* dictionary, | ||
Arg&& private_data_arg | ||
) | ||
{ | ||
ArrowArray array{}; | ||
fill_arrow_array<T>( | ||
array, | ||
length, | ||
null_count, | ||
offset, | ||
children_count, | ||
children, | ||
dictionary, | ||
std::forward<Arg>(private_data_arg) | ||
); | ||
return array; | ||
} | ||
} |
29 changes: 25 additions & 4 deletions
29
include/sparrow_ipc/arrow_interface/arrow_array/private_data.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,46 @@ | ||
#pragma once | ||
|
||
#include <concepts> | ||
#include <cstdint> | ||
#include <vector> | ||
|
||
#include "sparrow_ipc/config/config.hpp" | ||
|
||
namespace sparrow_ipc | ||
{ | ||
template <typename T> | ||
concept ArrowPrivateData = requires(T& t) | ||
{ | ||
{ t.buffers_ptrs() } -> std::same_as<const void**>; | ||
{ t.n_buffers() } -> std::convertible_to<std::size_t>; | ||
}; | ||
|
||
class owning_arrow_array_private_data | ||
{ | ||
public: | ||
|
||
explicit owning_arrow_array_private_data(std::vector<std::vector<std::uint8_t>>&& buffers); | ||
|
||
[[nodiscard]] SPARROW_IPC_API const void** buffers_ptrs() noexcept; | ||
[[nodiscard]] SPARROW_IPC_API std::size_t n_buffers() const noexcept; | ||
|
||
private: | ||
std::vector<std::vector<std::uint8_t>> m_buffers; | ||
std::vector<const void*> m_buffer_pointers; | ||
}; | ||
|
||
class non_owning_arrow_array_private_data | ||
{ | ||
public: | ||
|
||
explicit constexpr non_owning_arrow_array_private_data(std::vector<std::uint8_t*>&& buffers_pointers) | ||
: m_buffers_pointers(std::move(buffers_pointers)) | ||
: m_buffer_pointers(std::move(buffers_pointers)) | ||
{ | ||
} | ||
|
||
[[nodiscard]] SPARROW_IPC_API const void** buffers_ptrs() noexcept; | ||
[[nodiscard]] SPARROW_IPC_API std::size_t n_buffers() const noexcept; | ||
|
||
private: | ||
|
||
std::vector<std::uint8_t*> m_buffers_pointers; | ||
std::vector<std::uint8_t*> m_buffer_pointers; | ||
}; | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, but if you want to efficiently handle the case where some buffers are compressed and others not, you may want a more flexible facility, e.g.:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(in the longer term, sparrow might want a reference-counted optionally-owning buffer class...)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
^^ @JohanMabille