Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ jobs:
cd iceberg_rust_ffi
cargo build --release

- name: Set ICEBERG_RUST_LIB environment variable
if: inputs.build_rust
run: echo "ICEBERG_RUST_LIB=${{ github.workspace }}/iceberg_rust_ffi/target/release" >> $GITHUB_ENV

- name: Initialize containers
uses: gacts/run-and-post-run@v1
with:
Expand Down Expand Up @@ -99,9 +95,17 @@ jobs:
${{ runner.os }}-

- uses: julia-actions/julia-buildpkg@v1
- name: Force recompile with custom library

- name: Set JLL override preference for local Rust library
if: inputs.build_rust
run: julia --project=. -e 'Base.compilecache(Base.identify_package("RustyIceberg"))'
run: |
julia --project=. -e 'using Libdl; using Preferences; lib_path = joinpath("${{ github.workspace }}/iceberg_rust_ffi/target/release", "libiceberg_rust_ffi." * Libdl.dlext); set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => lib_path; force=true); println("Set preference to: ", lib_path)'

- name: Clear JLL override preference (use JLL package)
if: ${{ !inputs.build_rust }}
run: |
julia --project=. -e 'using Preferences; delete_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path")'

- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v3
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ iceberg_rust_ffi/integration_test
**/*.dylib
**/.claude
**/.DS_Store
Manifest.toml
Manifest.toml
LocalPreferences.toml
67 changes: 52 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: run-containers stop-containers build build-debug build-release test repl clean clean-all help
.PHONY: run-containers stop-containers build build-debug build-release test test-dev repl repl-dev set-local-lib clear-local-lib clean clean-all help

# Rust library configuration
RUST_FFI_DIR = iceberg_rust_ffi
Expand Down Expand Up @@ -41,24 +41,50 @@ build-debug:
build-release:
$(MAKE) BUILD_TYPE=release build

# Run tests (requires .env file)
test: build
# Helper target: Set local library preference for development
set-local-lib:
@julia --project=. -e 'using Libdl; using Preferences; lib_path = joinpath("$(shell pwd)/$(TARGET_DIR)", "libiceberg_rust_ffi." * Libdl.dlext); set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => lib_path; force=true); println("Set local library preference to: ", lib_path)'
@echo "Julia will use this path after restarting/recompiling."

# Helper target: Clear local library preference (use JLL default)
clear-local-lib:
@julia --project=. -e 'using Preferences; delete_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path")'
@echo "Cleared local library preference. Julia will use JLL artifact after restarting/recompiling."

# Run tests with local build (for development)
test-dev: build set-local-lib
@if [ ! -f .env ]; then \
echo "Error: .env file not found. Please create a .env file with required environment variables."; \
exit 1; \
fi
@set -a && . ./.env && set +a && \
$(JULIA_THREADS_ENV) julia --project=. -e 'using Pkg; Pkg.test()'

# Run tests with JLL package (production mode)
test: clear-local-lib
@if [ ! -f .env ]; then \
echo "Error: .env file not found. Please create a .env file with required environment variables."; \
exit 1; \
fi
@set -a && . ./.env && set +a && \
export ICEBERG_RUST_LIB=$$(pwd)/$(TARGET_DIR) && \
$(JULIA_THREADS_ENV) julia --project=. -e 'Base.compilecache(Base.identify_package("RustyIceberg")); using Pkg; Pkg.test()'
$(JULIA_THREADS_ENV) julia --project=. -e 'using Pkg; Pkg.test()'

# Start Julia REPL with environment configured (requires .env file)
repl: build
# Start Julia REPL with local build (for development)
repl-dev: build set-local-lib
@if [ ! -f .env ]; then \
echo "Error: .env file not found. Please create a .env file with required environment variables."; \
exit 1; \
fi
@set -a && . ./.env && set +a && \
$(JULIA_THREADS_ENV) julia --project=.

# Start Julia REPL with JLL package (production mode)
repl: clear-local-lib
@if [ ! -f .env ]; then \
echo "Error: .env file not found. Please create a .env file with required environment variables."; \
exit 1; \
fi
@set -a && . ./.env && set +a && \
export ICEBERG_RUST_LIB=$$(pwd)/$(TARGET_DIR) && \
$(JULIA_THREADS_ENV) julia --project=.

# Clean build artifacts
Expand All @@ -76,17 +102,28 @@ help:
@echo " build - Build the Rust FFI library (use BUILD_TYPE=debug for debug build)"
@echo " build-debug - Build the Rust FFI library in debug mode"
@echo " build-release - Build the Rust FFI library in release mode"
@echo " test - Run Julia tests (requires .env file and runs build first)"
@echo " repl - Start Julia REPL with environment configured (requires .env file)"
@echo ""
@echo "Development targets (use local Rust build):"
@echo " test-dev - Run Julia tests with local Rust library"
@echo " repl-dev - Start Julia REPL with local Rust library"
@echo " set-local-lib - Set preference to use local Rust library"
@echo ""
@echo "Production targets (use JLL package):"
@echo " test - Run Julia tests with JLL package"
@echo " repl - Start Julia REPL with JLL package"
@echo " clear-local-lib - Clear local library preference (use JLL)"
@echo ""
@echo "Docker targets:"
@echo " run-containers - Start docker containers"
@echo " stop-containers - Stop docker containers"
@echo ""
@echo "Cleanup targets:"
@echo " clean - Clean build artifacts"
@echo " clean-all - Clean everything including target directory"
@echo " help - Show this help message"
@echo ""
@echo "Examples:"
@echo " make test - Build in debug mode and run tests"
@echo " make BUILD_TYPE=release test - Build in release mode and run tests"
@echo " make build-release repl - Build in release mode and start REPL"
@echo " JULIA_NUM_THREADS=8 make test - Run tests with 8 Julia threads"
@echo " JULIA_NUM_THREADS=4 make repl - Start REPL with 4 Julia threads"
@echo " make test-dev - Build and test with local Rust library"
@echo " make BUILD_TYPE=release test-dev - Build release and test with local library"
@echo " make test - Test with JLL package (production mode)"
@echo " JULIA_NUM_THREADS=8 make test-dev - Run dev tests with 8 Julia threads"
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ version = "0.3.3"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
iceberg_rust_ffi_jll = "6bd5c94f-693c-53e3-983d-a09fad412f22"

[compat]
Arrow = "2.0"
julia = "1.10"
Libdl = "1"
Preferences = "1.3"
iceberg_rust_ffi_jll = "0.3"

[extras]
Expand Down
108 changes: 67 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,81 @@

[![CI](https://github.com/RelationalAI/RustyIceberg.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/RelationalAI/RustyIceberg.jl/actions/workflows/CI.yml)

A Julia package that provides bindings to the Iceberg C API, allowing you to read Apache Iceberg tables directly from Julia.
A Julia package that provides bindings on top of Iceberg Rust crate, allowing you to read Apache Iceberg tables directly from Julia.

## Overview

This package wraps the iceberg_rust_ffi interface with Julia bindings, providing both low-level C API access and high-level Julia interfaces for working with Iceberg tables. It supports reading data from Iceberg tables and provides an iterator interface over Arrow format data.

## Features

- **Low-level Rust FFI bindings**: Direct access to all Iceberg Rust FFI API functions
- **High-level Julia interface**: Easy-to-use `read_iceberg_table()` function that returns an iterator
- **Arrow integration**: Seamless iteration over Arrow.Table objects
- **Dynamic library loading**: Automatic loading and unloading of the C library
- **Memory management**: Proper cleanup of C resources
- **Error handling**: Comprehensive error reporting and handling
- **Iterator-based API**: Streaming of data
This package wraps the iceberg_rust_ffi interface with Julia bindings, providing Julia interfaces for working with Iceberg tables. It supports reading data from Iceberg tables in full-scan and incremental-scan modes.

## Installation

1. Clone this repository:
```bash
git clone <repository-url>
```

2. Install the package in Julia:
1. Install the package in Julia:
```julia
using Pkg
Pkg.add(path=".")
Pkg.add("RustyIceberg")
```

3. Install dependencies:
2. Install dependencies:
```julia
Pkg.instantiate()
```

## Development

When working on RustyIceberg.jl, you can either use
[iceberg_rust_ffi_jll.jl](https://github.com/RelationalAI/iceberg_rust_ffi_jll.jl) or use
a local build of [iceberg_rust_ffi](https://github.com/RelationalAI/iceberg_rust_ffi).
When using a local build, set the environment variable `ICEBERG_RUST_LIB` to the directory
containing the build. For example, if you have the `iceberg_rust_ffi` repository at
`~/repos/iceberg_rust_ffi` and build the library by running `cargo build --release` from
the base of that repository, then you could use that local build by setting
ICEBERG_RUST_LIB="~/repos/iceberg_rust_ffi/target/release".
When working on RustyIceberg.jl, you can either use the precompiled
[iceberg_rust_ffi_jll.jl](https://github.com/JuliaBinaryWrappers/iceberg_rust_ffi_jll.jl) package
or use a local build of [iceberg_rust_ffi](./iceberg_rust_ffi/).

### Custom JLL
If you want to test a custom code, you can refer directly to this repo's branch (or a fork). However, if the change involves the FFI change, you might want to build a custom JLL so that the downstream projects don't have to have Rust toolchain in their repository. The way we do it here is by mimicking the JuliaBinaryWrappers/iceberg_rust_ffi_jll.jl structure in https://github.com/RelationalAI/iceberg_rust_ffi_jll.jl/. All one has to do is invoke this [GitHub Action there](https://github.com/RelationalAI/iceberg_rust_ffi_jll.jl/actions/workflows/build-and-deploy.yml), follow the README in that repo for details.
This workflow will produce a new branch in that repo. Then in your Julia simply refer to that repo with a repo-rev equal to the commit of the newly produced branch in that repo.

### Using Local Builds

To use a local Rust library build, set a preference using `Preferences.jl` with the **full path to the library file**:

```julia
using Libdl, Preferences
lib_path = joinpath(expanduser("~/repos/iceberg_rust_ffi/target/release"), "libiceberg_rust_ffi." * Libdl.dlext)
set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => lib_path; force=true)
```

The `Libdl.dlext` ensures the correct extension for your platform:
- macOS: `.dylib`
- Linux: `.so`
- Windows: `.dll`

**Note**: After setting preferences, you need to restart Julia or trigger package recompilation for changes to take effect.

### Makefile Targets

The project includes convenient Makefile targets for development:

**Development mode (uses local Rust build):**
- `make test-dev` - Build Rust library and run tests with local build
- `make repl-dev` - Build Rust library and start REPL with local build
- `make set-local-lib` - Set preference to use local Rust library

**Production mode (uses JLL package):**
- `make test` - Run tests with JLL package
- `make repl` - Start REPL with JLL package
- `make clear-local-lib` - Clear local library preference

**Examples:**
```bash
# Development workflow
make test-dev # Test with local debug build
make BUILD_TYPE=release test-dev # Test with local release build

# Switch back to JLL package
make test # Clears local preference and tests with JLL
```

To switch back to the JLL package, either run `make clear-local-lib` or manually delete the preference:

```julia
using Preferences
delete_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path")
```

## Prerequisites

Expand All @@ -64,18 +95,13 @@ using Pkg
Pkg.test("RustyIceberg")
```

The tests replicate the functionality of the C integration test (`integration_test.c`) but using Julia bindings. Note that some tests may fail if S3 credentials are not configured or if the test data is not available.

## Troubleshooting

### Common Issues
## CI
CI runs with the custom iceberg_rust_ffi, built from the source. Releases run with official JLL, which is the default. CI overrides default using Preferences.jl.

1. **Library not found**: Ensure `libiceberg_rust_ffi.dylib` is in your library path
2. **S3 access denied**: Check your AWS credentials and permissions
3. **Memory errors**: Ensure you're not holding references to freed C objects
## Release

## Dependencies
### JLL Release
To make a JLL release, create a new PR in JuliaPackaging/Yggdrasil repo, e.g. like [this one](https://github.com/JuliaPackaging/Yggdrasil/pull/12532/files).

- **Libdl**: For dynamic library loading
- **Arrow**: For Arrow format support
- **Test**: For testing (development dependency)
### RustyIceberg release
To create a new RustyIceberg release, simply bump the version in Project.toml, merge that in `main`, and then open that commit and comment like [here](https://github.com/RelationalAI/RustyIceberg.jl/commit/cbebb0e9611f70867e6ad2fbca0060a44345ae31#commitcomment-170551595). This will trigger an update in JuliaRegistries (should take ~20m), which will then invoke a TagBot in this repository, which will also run CI tests with the official JLL.
20 changes: 4 additions & 16 deletions src/RustyIceberg.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
module RustyIceberg

using Base.Libc.Libdl: dlext
using Base: @kwdef, @lock
using Base.Threads: Atomic
using Libdl
using Arrow
using iceberg_rust_ffi_jll

Expand All @@ -17,20 +15,10 @@ export scan!, next_batch, free_batch, free_stream

const Option{T} = Union{T, Nothing}

const rust_lib = if haskey(ENV, "ICEBERG_RUST_LIB")
# For development, e.g. run `cargo build --release` and point to `target/release/` dir.
# Note this is set a precompilation time, as `ccall` needs this to be a `const`,
# so you need to restart Julia / recompile the package if you change it.
lib_path = realpath(joinpath(ENV["ICEBERG_RUST_LIB"], "libiceberg_rust_ffi.$(dlext)"))
@warn """
Using unreleased iceberg_rust_ffi library:
$(repr(replace(lib_path, homedir() => "~")))
This is only intended for local development and should not be used in production.
"""
lib_path
else
iceberg_rust_ffi_jll.libiceberg_rust_ffi
end
# Always use the JLL library - override via Preferences if needed for local development
# To use a local build, set the preference:
# using Preferences; set_preferences!("iceberg_rust_ffi_jll", "libiceberg_rust_ffi_path" => "/path/to/target/release/")
const rust_lib = iceberg_rust_ffi_jll.libiceberg_rust_ffi

"""
Runtime configuration for the Iceberg library.
Expand Down