diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 648ddcf..b6a1cc2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,10 +47,6 @@ jobs: cd iceberg_rust_ffi cargo build --release - - name: Set ICEBERG_RUST_LIB environment variable - if: inputs.build_rust - run: echo "ICEBERG_RUST_LIB=${{ github.workspace }}/iceberg_rust_ffi/target/release" >> $GITHUB_ENV - - name: Initialize containers uses: gacts/run-and-post-run@v1 with: @@ -99,9 +95,17 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - - name: Force recompile with custom library + + - name: Set JLL override preference for local Rust library if: inputs.build_rust - run: julia --project=. -e 'Base.compilecache(Base.identify_package("RustyIceberg"))' + run: | + julia --project=. -e 'using Libdl; using Preferences; lib_path = joinpath("${{ github.workspace }}/iceberg_rust_ffi/target/release", "libiceberg_rust_ffi." * Libdl.dlext); set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => lib_path; force=true); println("Set preference to: ", lib_path)' + + - name: Clear JLL override preference (use JLL package) + if: ${{ !inputs.build_rust }} + run: | + julia --project=. -e 'using Preferences; delete_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path")' + - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v3 diff --git a/.gitignore b/.gitignore index 2229295..9080477 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ iceberg_rust_ffi/integration_test **/*.dylib **/.claude **/.DS_Store -Manifest.toml \ No newline at end of file +Manifest.toml +LocalPreferences.toml \ No newline at end of file diff --git a/Makefile b/Makefile index 5c7625d..c151d0d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: run-containers stop-containers build build-debug build-release test repl clean clean-all help +.PHONY: run-containers stop-containers build build-debug build-release test test-dev repl repl-dev set-local-lib clear-local-lib clean clean-all help # Rust library configuration RUST_FFI_DIR = iceberg_rust_ffi @@ -41,24 +41,50 @@ build-debug: build-release: $(MAKE) BUILD_TYPE=release build -# Run tests (requires .env file) -test: build +# Helper target: Set local library preference for development +set-local-lib: + @julia --project=. -e 'using Libdl; using Preferences; lib_path = joinpath("$(shell pwd)/$(TARGET_DIR)", "libiceberg_rust_ffi." * Libdl.dlext); set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => lib_path; force=true); println("Set local library preference to: ", lib_path)' + @echo "Julia will use this path after restarting/recompiling." + +# Helper target: Clear local library preference (use JLL default) +clear-local-lib: + @julia --project=. -e 'using Preferences; delete_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path")' + @echo "Cleared local library preference. Julia will use JLL artifact after restarting/recompiling." + +# Run tests with local build (for development) +test-dev: build set-local-lib + @if [ ! -f .env ]; then \ + echo "Error: .env file not found. Please create a .env file with required environment variables."; \ + exit 1; \ + fi + @set -a && . ./.env && set +a && \ + $(JULIA_THREADS_ENV) julia --project=. -e 'using Pkg; Pkg.test()' + +# Run tests with JLL package (production mode) +test: clear-local-lib @if [ ! -f .env ]; then \ echo "Error: .env file not found. Please create a .env file with required environment variables."; \ exit 1; \ fi @set -a && . ./.env && set +a && \ - export ICEBERG_RUST_LIB=$$(pwd)/$(TARGET_DIR) && \ - $(JULIA_THREADS_ENV) julia --project=. -e 'Base.compilecache(Base.identify_package("RustyIceberg")); using Pkg; Pkg.test()' + $(JULIA_THREADS_ENV) julia --project=. -e 'using Pkg; Pkg.test()' -# Start Julia REPL with environment configured (requires .env file) -repl: build +# Start Julia REPL with local build (for development) +repl-dev: build set-local-lib + @if [ ! -f .env ]; then \ + echo "Error: .env file not found. Please create a .env file with required environment variables."; \ + exit 1; \ + fi + @set -a && . ./.env && set +a && \ + $(JULIA_THREADS_ENV) julia --project=. + +# Start Julia REPL with JLL package (production mode) +repl: clear-local-lib @if [ ! -f .env ]; then \ echo "Error: .env file not found. Please create a .env file with required environment variables."; \ exit 1; \ fi @set -a && . ./.env && set +a && \ - export ICEBERG_RUST_LIB=$$(pwd)/$(TARGET_DIR) && \ $(JULIA_THREADS_ENV) julia --project=. # Clean build artifacts @@ -76,17 +102,28 @@ help: @echo " build - Build the Rust FFI library (use BUILD_TYPE=debug for debug build)" @echo " build-debug - Build the Rust FFI library in debug mode" @echo " build-release - Build the Rust FFI library in release mode" - @echo " test - Run Julia tests (requires .env file and runs build first)" - @echo " repl - Start Julia REPL with environment configured (requires .env file)" + @echo "" + @echo "Development targets (use local Rust build):" + @echo " test-dev - Run Julia tests with local Rust library" + @echo " repl-dev - Start Julia REPL with local Rust library" + @echo " set-local-lib - Set preference to use local Rust library" + @echo "" + @echo "Production targets (use JLL package):" + @echo " test - Run Julia tests with JLL package" + @echo " repl - Start Julia REPL with JLL package" + @echo " clear-local-lib - Clear local library preference (use JLL)" + @echo "" + @echo "Docker targets:" @echo " run-containers - Start docker containers" @echo " stop-containers - Stop docker containers" + @echo "" + @echo "Cleanup targets:" @echo " clean - Clean build artifacts" @echo " clean-all - Clean everything including target directory" @echo " help - Show this help message" @echo "" @echo "Examples:" - @echo " make test - Build in debug mode and run tests" - @echo " make BUILD_TYPE=release test - Build in release mode and run tests" - @echo " make build-release repl - Build in release mode and start REPL" - @echo " JULIA_NUM_THREADS=8 make test - Run tests with 8 Julia threads" - @echo " JULIA_NUM_THREADS=4 make repl - Start REPL with 4 Julia threads" \ No newline at end of file + @echo " make test-dev - Build and test with local Rust library" + @echo " make BUILD_TYPE=release test-dev - Build release and test with local library" + @echo " make test - Test with JLL package (production mode)" + @echo " JULIA_NUM_THREADS=8 make test-dev - Run dev tests with 8 Julia threads" \ No newline at end of file diff --git a/Project.toml b/Project.toml index b2a1fc8..3131be3 100644 --- a/Project.toml +++ b/Project.toml @@ -4,13 +4,13 @@ version = "0.3.3" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" -Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" iceberg_rust_ffi_jll = "6bd5c94f-693c-53e3-983d-a09fad412f22" [compat] Arrow = "2.0" julia = "1.10" -Libdl = "1" +Preferences = "1.3" iceberg_rust_ffi_jll = "0.3" [extras] diff --git a/README.md b/README.md index 4592bcc..2b2532d 100644 --- a/README.md +++ b/README.md @@ -2,50 +2,81 @@ [![CI](https://github.com/RelationalAI/RustyIceberg.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/RelationalAI/RustyIceberg.jl/actions/workflows/CI.yml) -A Julia package that provides bindings to the Iceberg C API, allowing you to read Apache Iceberg tables directly from Julia. +A Julia package that provides bindings on top of Iceberg Rust crate, allowing you to read Apache Iceberg tables directly from Julia. ## Overview -This package wraps the iceberg_rust_ffi interface with Julia bindings, providing both low-level C API access and high-level Julia interfaces for working with Iceberg tables. It supports reading data from Iceberg tables and provides an iterator interface over Arrow format data. - -## Features - -- **Low-level Rust FFI bindings**: Direct access to all Iceberg Rust FFI API functions -- **High-level Julia interface**: Easy-to-use `read_iceberg_table()` function that returns an iterator -- **Arrow integration**: Seamless iteration over Arrow.Table objects -- **Dynamic library loading**: Automatic loading and unloading of the C library -- **Memory management**: Proper cleanup of C resources -- **Error handling**: Comprehensive error reporting and handling -- **Iterator-based API**: Streaming of data +This package wraps the iceberg_rust_ffi interface with Julia bindings, providing Julia interfaces for working with Iceberg tables. It supports reading data from Iceberg tables in full-scan and incremental-scan modes. ## Installation - -1. Clone this repository: -```bash -git clone -``` - -2. Install the package in Julia: +1. Install the package in Julia: ```julia using Pkg -Pkg.add(path=".") +Pkg.add("RustyIceberg") ``` -3. Install dependencies: +2. Install dependencies: ```julia Pkg.instantiate() ``` ## Development -When working on RustyIceberg.jl, you can either use -[iceberg_rust_ffi_jll.jl](https://github.com/RelationalAI/iceberg_rust_ffi_jll.jl) or use -a local build of [iceberg_rust_ffi](https://github.com/RelationalAI/iceberg_rust_ffi). -When using a local build, set the environment variable `ICEBERG_RUST_LIB` to the directory -containing the build. For example, if you have the `iceberg_rust_ffi` repository at -`~/repos/iceberg_rust_ffi` and build the library by running `cargo build --release` from -the base of that repository, then you could use that local build by setting -ICEBERG_RUST_LIB="~/repos/iceberg_rust_ffi/target/release". +When working on RustyIceberg.jl, you can either use the precompiled +[iceberg_rust_ffi_jll.jl](https://github.com/JuliaBinaryWrappers/iceberg_rust_ffi_jll.jl) package +or use a local build of [iceberg_rust_ffi](./iceberg_rust_ffi/). + +### Custom JLL +If you want to test a custom code, you can refer directly to this repo's branch (or a fork). However, if the change involves the FFI change, you might want to build a custom JLL so that the downstream projects don't have to have Rust toolchain in their repository. The way we do it here is by mimicking the JuliaBinaryWrappers/iceberg_rust_ffi_jll.jl structure in https://github.com/RelationalAI/iceberg_rust_ffi_jll.jl/. All one has to do is invoke this [GitHub Action there](https://github.com/RelationalAI/iceberg_rust_ffi_jll.jl/actions/workflows/build-and-deploy.yml), follow the README in that repo for details. +This workflow will produce a new branch in that repo. Then in your Julia simply refer to that repo with a repo-rev equal to the commit of the newly produced branch in that repo. + +### Using Local Builds + +To use a local Rust library build, set a preference using `Preferences.jl` with the **full path to the library file**: + +```julia +using Libdl, Preferences +lib_path = joinpath(expanduser("~/repos/iceberg_rust_ffi/target/release"), "libiceberg_rust_ffi." * Libdl.dlext) +set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => lib_path; force=true) +``` + +The `Libdl.dlext` ensures the correct extension for your platform: +- macOS: `.dylib` +- Linux: `.so` +- Windows: `.dll` + +**Note**: After setting preferences, you need to restart Julia or trigger package recompilation for changes to take effect. + +### Makefile Targets + +The project includes convenient Makefile targets for development: + +**Development mode (uses local Rust build):** +- `make test-dev` - Build Rust library and run tests with local build +- `make repl-dev` - Build Rust library and start REPL with local build +- `make set-local-lib` - Set preference to use local Rust library + +**Production mode (uses JLL package):** +- `make test` - Run tests with JLL package +- `make repl` - Start REPL with JLL package +- `make clear-local-lib` - Clear local library preference + +**Examples:** +```bash +# Development workflow +make test-dev # Test with local debug build +make BUILD_TYPE=release test-dev # Test with local release build + +# Switch back to JLL package +make test # Clears local preference and tests with JLL +``` + +To switch back to the JLL package, either run `make clear-local-lib` or manually delete the preference: + +```julia +using Preferences +delete_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path") +``` ## Prerequisites @@ -64,18 +95,13 @@ using Pkg Pkg.test("RustyIceberg") ``` -The tests replicate the functionality of the C integration test (`integration_test.c`) but using Julia bindings. Note that some tests may fail if S3 credentials are not configured or if the test data is not available. - -## Troubleshooting - -### Common Issues +## CI +CI runs with the custom iceberg_rust_ffi, built from the source. Releases run with official JLL, which is the default. CI overrides default using Preferences.jl. -1. **Library not found**: Ensure `libiceberg_rust_ffi.dylib` is in your library path -2. **S3 access denied**: Check your AWS credentials and permissions -3. **Memory errors**: Ensure you're not holding references to freed C objects +## Release -## Dependencies +### JLL Release +To make a JLL release, create a new PR in JuliaPackaging/Yggdrasil repo, e.g. like [this one](https://github.com/JuliaPackaging/Yggdrasil/pull/12532/files). -- **Libdl**: For dynamic library loading -- **Arrow**: For Arrow format support -- **Test**: For testing (development dependency) \ No newline at end of file +### RustyIceberg release +To create a new RustyIceberg release, simply bump the version in Project.toml, merge that in `main`, and then open that commit and comment like [here](https://github.com/RelationalAI/RustyIceberg.jl/commit/cbebb0e9611f70867e6ad2fbca0060a44345ae31#commitcomment-170551595). This will trigger an update in JuliaRegistries (should take ~20m), which will then invoke a TagBot in this repository, which will also run CI tests with the official JLL. diff --git a/src/RustyIceberg.jl b/src/RustyIceberg.jl index 3bfbad5..add38c6 100644 --- a/src/RustyIceberg.jl +++ b/src/RustyIceberg.jl @@ -1,9 +1,7 @@ module RustyIceberg -using Base.Libc.Libdl: dlext using Base: @kwdef, @lock using Base.Threads: Atomic -using Libdl using Arrow using iceberg_rust_ffi_jll @@ -17,20 +15,10 @@ export scan!, next_batch, free_batch, free_stream const Option{T} = Union{T, Nothing} -const rust_lib = if haskey(ENV, "ICEBERG_RUST_LIB") - # For development, e.g. run `cargo build --release` and point to `target/release/` dir. - # Note this is set a precompilation time, as `ccall` needs this to be a `const`, - # so you need to restart Julia / recompile the package if you change it. - lib_path = realpath(joinpath(ENV["ICEBERG_RUST_LIB"], "libiceberg_rust_ffi.$(dlext)")) - @warn """ - Using unreleased iceberg_rust_ffi library: - $(repr(replace(lib_path, homedir() => "~"))) - This is only intended for local development and should not be used in production. - """ - lib_path -else - iceberg_rust_ffi_jll.libiceberg_rust_ffi -end +# Always use the JLL library - override via Preferences if needed for local development +# To use a local build, set the preference: +# using Preferences; set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => "/path/to/target/release/") +const rust_lib = iceberg_rust_ffi_jll.libiceberg_rust_ffi """ Runtime configuration for the Iceberg library.