From 057c612be2cf3fdd9c3e42c4f1d214aa5e3522fa Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Tue, 17 Jun 2025 12:10:23 +0200 Subject: [PATCH 1/8] Generate headers with flatc from apache arrow schemas --- .gitignore | 4 +++ CMakeLists.txt | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 11 +++++++ 3 files changed, 94 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 src/main.cpp diff --git a/.gitignore b/.gitignore index 259148f..e1ab4e6 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,7 @@ *.exe *.out *.app + +# Ignore build directory +/build/ +/build-*/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..3857f5e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,79 @@ +cmake_minimum_required(VERSION 3.28) + +project(sparrow-ipc CXX) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +#set(CMAKE_CXX_SCAN_FOR_MODULES OFF) # We don't use modules? + +set(SCHEMA_DIR ${CMAKE_SOURCE_DIR}/format) +set(FLATBUFFERS_GENERATED_DIR ${CMAKE_SOURCE_DIR}/src/generated) + +find_program(FLATC_EXECUTABLE flatc) + +if(NOT FLATC_EXECUTABLE) + message(FATAL_ERROR "flatc not found. Please install Flatbuffers.") +endif() + +# Fetch schemas from apache arrow +# TODO Automate this? or use our own? +set(SCHEMA_URLS + "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/File.fbs" + "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/Message.fbs" + "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/Schema.fbs" + "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/Tensor.fbs" + "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/SparseTensor.fbs" + # TODO what about feather.fbs? +) + +file(MAKE_DIRECTORY ${SCHEMA_DIR}) + +# Download schemas +set(FLATBUFFERS_SCHEMAS "") +foreach(url IN LISTS SCHEMA_URLS) + get_filename_component(filename ${url} NAME) + message(STATUS "Downloading schema: ${url}") + file(DOWNLOAD ${url} ${SCHEMA_DIR}/${filename} + STATUS status + SHOW_PROGRESS) + list(APPEND FLATBUFFERS_SCHEMAS ${SCHEMA_DIR}/${filename}) +endforeach() + +# Generate Flatbuffers C++ headers from the schemas +file(MAKE_DIRECTORY ${FLATBUFFERS_GENERATED_DIR}) + +# Generate output files list +set(FLATBUFFERS_GENERATED_HEADERS "") +foreach(fbs_file IN LISTS FLATBUFFERS_SCHEMAS) + # Generate the corresponding header file name + get_filename_component(header_name ${fbs_file} NAME_WE) + list(APPEND FLATBUFFERS_GENERATED_HEADERS "${FLATBUFFERS_GENERATED_DIR}/${header_name}_generated.h") +endforeach() + +add_custom_command( + OUTPUT ${FLATBUFFERS_GENERATED_HEADERS} + COMMAND ${FLATC_EXECUTABLE} --cpp -o ${FLATBUFFERS_GENERATED_DIR} --cpp-std c++17 --scoped-enums ${FLATBUFFERS_SCHEMAS} + DEPENDS ${FLATBUFFERS_SCHEMAS} + COMMENT "Generating FlatBuffers C++ headers from schemas" +) + +add_custom_target(generate_flatbuffers_headers + DEPENDS ${FLATBUFFERS_GENERATED_HEADERS} +) + +# Interface target for generated headers +add_library(flatbuffers_interface INTERFACE) +target_include_directories(flatbuffers_interface INTERFACE ${FLATBUFFERS_GENERATED_DIR}) +add_dependencies(flatbuffers_interface generate_flatbuffers_headers) + +# Main executable target +# TODO change to example later +add_executable(sparrow-ipc src/main.cpp) +target_link_libraries(sparrow-ipc PRIVATE flatbuffers_interface) + +# Find and link FlatBuffers library +find_package(FlatBuffers REQUIRED) +target_link_libraries(sparrow-ipc PRIVATE flatbuffers::flatbuffers) + +add_dependencies(sparrow-ipc generate_flatbuffers_headers) + diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..d4e8b3e --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,11 @@ +#include + +#include "generated/File_generated.h" + +// TODO move this (i.e main) to a test? (or example?) +int main() +{ + + + return 0; +} From 26edb8633526bf20ca1d795bf2e0c11eadafb061 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Wed, 18 Jun 2025 10:36:14 +0200 Subject: [PATCH 2/8] Link sparrow --- CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3857f5e..8a96e3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,14 +66,13 @@ add_library(flatbuffers_interface INTERFACE) target_include_directories(flatbuffers_interface INTERFACE ${FLATBUFFERS_GENERATED_DIR}) add_dependencies(flatbuffers_interface generate_flatbuffers_headers) +find_package(FlatBuffers CONFIG REQUIRED) +find_package(sparrow CONFIG REQUIRED) + # Main executable target # TODO change to example later add_executable(sparrow-ipc src/main.cpp) -target_link_libraries(sparrow-ipc PRIVATE flatbuffers_interface) - -# Find and link FlatBuffers library -find_package(FlatBuffers REQUIRED) -target_link_libraries(sparrow-ipc PRIVATE flatbuffers::flatbuffers) +target_link_libraries(sparrow-ipc PRIVATE flatbuffers_interface flatbuffers::flatbuffers sparrow) add_dependencies(sparrow-ipc generate_flatbuffers_headers) From 2f5d6ebc9cb1527eb196438ebf67c08681910243 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Wed, 18 Jun 2025 10:45:59 +0200 Subject: [PATCH 3/8] Add dev environment file --- environment-dev.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 environment-dev.yml diff --git a/environment-dev.yml b/environment-dev.yml new file mode 100644 index 0000000..442ec9a --- /dev/null +++ b/environment-dev.yml @@ -0,0 +1,11 @@ +name: sparrow-ipc +channels: + - conda-forge +dependencies: + # Build dependencies + - cmake + - make # or ninja + - cxx-compiler + # Libraries dependencies + - flatbuffers + - sparrow From 20a7baaeaec0e7c8306328ad689ead098509266a Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Wed, 18 Jun 2025 10:54:05 +0200 Subject: [PATCH 4/8] Ignore all build dirs --- .gitignore | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index e1ab4e6..8e81a66 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,5 @@ *.out *.app -# Ignore build directory -/build/ -/build-*/ +# Build directories +/build*/ From 6b8b0a4c17dd9fa690a0c17658edc09458eeee7e Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Wed, 18 Jun 2025 13:56:20 +0200 Subject: [PATCH 5/8] Code review: put fbs schemas and generated headers in build --- CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a96e3c..da79f42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,10 +4,10 @@ project(sparrow-ipc CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -#set(CMAKE_CXX_SCAN_FOR_MODULES OFF) # We don't use modules? +set(CMAKE_CXX_SCAN_FOR_MODULES OFF) -set(SCHEMA_DIR ${CMAKE_SOURCE_DIR}/format) -set(FLATBUFFERS_GENERATED_DIR ${CMAKE_SOURCE_DIR}/src/generated) +set(SCHEMA_DIR ${CMAKE_BINARY_DIR}/format) +set(FLATBUFFERS_GENERATED_DIR ${CMAKE_BINARY_DIR}/generated) find_program(FLATC_EXECUTABLE flatc) @@ -16,7 +16,6 @@ if(NOT FLATC_EXECUTABLE) endif() # Fetch schemas from apache arrow -# TODO Automate this? or use our own? set(SCHEMA_URLS "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/File.fbs" "https://raw.githubusercontent.com/apache/arrow/refs/heads/main/format/Message.fbs" From 84936a0fcf8e3ebffdce1e9ddeab7ef52ab167f4 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Wed, 18 Jun 2025 16:21:20 +0200 Subject: [PATCH 6/8] Add test and more adjustments --- CMakeLists.txt | 15 ++++++++++++--- environment-dev.yml | 2 ++ src/main.cpp | 11 ----------- src/sparrow-ipc.cpp | 4 ++++ tests/CMakeLists.txt | 18 ++++++++++++++++++ tests/test.cpp | 25 +++++++++++++++++++++++++ 6 files changed, 61 insertions(+), 14 deletions(-) delete mode 100644 src/main.cpp create mode 100644 src/sparrow-ipc.cpp create mode 100644 tests/CMakeLists.txt create mode 100644 tests/test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index da79f42..0950944 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,11 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_SCAN_FOR_MODULES OFF) +# Build options +# ============= +OPTION(BUILD_TESTS "Build sparrow-ipc test suite" OFF) +MESSAGE(STATUS "🔧 Build tests: ${BUILD_TESTS}") + set(SCHEMA_DIR ${CMAKE_BINARY_DIR}/format) set(FLATBUFFERS_GENERATED_DIR ${CMAKE_BINARY_DIR}/generated) @@ -68,10 +73,14 @@ add_dependencies(flatbuffers_interface generate_flatbuffers_headers) find_package(FlatBuffers CONFIG REQUIRED) find_package(sparrow CONFIG REQUIRED) -# Main executable target -# TODO change to example later -add_executable(sparrow-ipc src/main.cpp) +# TODO Handle shared/static build later (after more code is available) +add_library(sparrow-ipc STATIC src/sparrow-ipc.cpp) target_link_libraries(sparrow-ipc PRIVATE flatbuffers_interface flatbuffers::flatbuffers sparrow) add_dependencies(sparrow-ipc generate_flatbuffers_headers) +if(BUILD_TESTS) + message(STATUS "🧪 Create tests targets") + enable_testing() + add_subdirectory(tests) +endif() diff --git a/environment-dev.yml b/environment-dev.yml index 442ec9a..b48e7f3 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -9,3 +9,5 @@ dependencies: # Libraries dependencies - flatbuffers - sparrow + # Tests + - doctest diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100644 index d4e8b3e..0000000 --- a/src/main.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include - -#include "generated/File_generated.h" - -// TODO move this (i.e main) to a test? (or example?) -int main() -{ - - - return 0; -} diff --git a/src/sparrow-ipc.cpp b/src/sparrow-ipc.cpp new file mode 100644 index 0000000..7fefd21 --- /dev/null +++ b/src/sparrow-ipc.cpp @@ -0,0 +1,4 @@ +#include "sparrow/sparrow.hpp" + +#include "../generated/Schema_generated.h" + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..bec125f --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.28) + +find_package(doctest CONFIG REQUIRED) + +set(test_target "test_sparrow_ipc_lib") + +add_executable(${test_target} test.cpp) +target_link_libraries(${test_target} + PRIVATE + sparrow-ipc + doctest::doctest +) +target_include_directories(${test_target} + PRIVATE + ${CMAKE_BINARY_DIR}/generated +) +add_dependencies(${test_target} generate_flatbuffers_headers) +add_test(NAME sparrow-ipc-tests COMMAND ${test_target}) diff --git a/tests/test.cpp b/tests/test.cpp new file mode 100644 index 0000000..853cc45 --- /dev/null +++ b/tests/test.cpp @@ -0,0 +1,25 @@ +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN + +#include "sparrow/sparrow.hpp" +#include "doctest/doctest.h" + +#include "../generated/Schema_generated.h" + +// NOTE this is just testing sparrow internals usability, +// for now we are not testing anything with serialization/deserialization +TEST_CASE("Use sparrow primitive_array") +{ + namespace sp = sparrow; + + sp::primitive_array ar = { 1, 3, 5, 7, 9 }; + CHECK_EQ(ar.size(), 5); + + auto [arrow_array, arrow_schema] = sp::extract_arrow_structures(std::move(ar)); + CHECK_EQ(arrow_array.length, 5); + + // Serialize + // Deserialize + + arrow_array.release(&arrow_array); + arrow_schema.release(&arrow_schema); +} From bf39486ca3ad6e6de8a8cbb06041f845fb2853fe Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Thu, 19 Jun 2025 09:37:02 +0200 Subject: [PATCH 7/8] Add linux workflow --- .github/workflows/linux.yml | 38 +++++++++++++++++++++++++++++++++++++ environment-dev.yml | 3 ++- 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/linux.yml diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 0000000..2b50162 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,38 @@ +name: Build and test sparrow-ipc + +on: + push: + branches: + - main + pull_request: + branches: + - main + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Create build environment + uses: mamba-org/setup-micromamba@v2 + with: + environment-file: ./environment-dev.yml + environment-name: build_env + cache-environment: true + - name: Build sparrow-ipc + run: | + cmake -B build/ -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ + -DCMAKE_PREFIX_PATH=$CONDA_PREFIX \ + -DBUILD_TESTS=ON + cmake --build build/ --parallel + - name: Run tests + run: | + cd build + ctest --output-on-failure diff --git a/environment-dev.yml b/environment-dev.yml index b48e7f3..05b16b6 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -4,7 +4,8 @@ channels: dependencies: # Build dependencies - cmake - - make # or ninja + - make + - ninja - cxx-compiler # Libraries dependencies - flatbuffers From 9604459b4a0d2b465eadbd34c3a1e20631b6eb27 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Thu, 19 Jun 2025 09:48:51 +0200 Subject: [PATCH 8/8] Change workflow trigger --- .github/workflows/linux.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 2b50162..0dc5106 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -1,12 +1,10 @@ name: Build and test sparrow-ipc on: - push: - branches: - - main + workflow_dispatch: pull_request: - branches: - - main + push: + branches: [main] defaults: run: