Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "sync", "tim
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
serde_json = "1.0"
uuid = { version = "1.0", features = ["v4"] }
uuid = { version = "1.0", features = ["v4", "v7"] }
log = "0.4"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
Expand All @@ -51,6 +51,7 @@ arrow = { version = "55", default-features = false }
arrow-array = "55"
arrow-ipc = "55"
arrow-schema = { version = "55", features = ["serde"] }
parquet = "55"
futures = "0.3"
serde_json_path = "0.7"
xxhash-rust = { version = "0.8", features = ["xxh3"] }
Expand Down Expand Up @@ -78,3 +79,6 @@ governor = "0.8.0"
default = ["incremental-cache", "python"]
incremental-cache = ["wasmtime/incremental-cache"]
python = []

[dev-dependencies]
tempfile = "3.27.0"
83 changes: 72 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,49 @@

APP_NAME := function-stream
VERSION := $(shell grep '^version' Cargo.toml | head -1 | awk -F '"' '{print $$2}')
ARCH := $(shell uname -m)
OS := $(shell uname -s | tr '[:upper:]' '[:lower:]')
DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")

# 1. Auto-detect system environment & normalize architecture
RAW_ARCH := $(shell uname -m)
# Fix macOS M-series returning arm64 while Rust expects aarch64
ifeq ($(RAW_ARCH), arm64)
ARCH := aarch64
else ifeq ($(RAW_ARCH), amd64)
ARCH := x86_64
else
ARCH := $(RAW_ARCH)
endif

OS := $(shell uname -s | tr '[:upper:]' '[:lower:]')
OS_NAME := $(shell uname -s)

# 2. Configure RUSTFLAGS and target triple per platform
DIST_ROOT := dist
TARGET_DIR := target/release
ifeq ($(OS_NAME), Linux)
TRIPLE := $(ARCH)-unknown-linux-gnu
STATIC_FLAGS :=
else ifeq ($(OS_NAME), Darwin)
# macOS: strip symbols but keep dynamic linking (Apple system restriction)
TRIPLE := $(ARCH)-apple-darwin
STATIC_FLAGS :=
else ifneq (,$(findstring MINGW,$(OS_NAME))$(findstring MSYS,$(OS_NAME)))
# Windows (Git Bash / MSYS2): static-link MSVC runtime
TRIPLE := $(ARCH)-pc-windows-msvc
STATIC_FLAGS := -C target-feature=+crt-static
else
# Fallback
TRIPLE := $(ARCH)-unknown-linux-gnu
STATIC_FLAGS :=
endif

# 3. Aggressive optimization flags
# opt-level=z : size-oriented, minimize binary footprint
# strip=symbols: remove debug symbol table at link time
# Note: panic=abort is intentionally omitted to preserve stack unwinding
# for better fault tolerance in the streaming runtime
OPTIMIZE_FLAGS := -C opt-level=z -C strip=symbols $(STATIC_FLAGS)

TARGET_DIR := target/$(TRIPLE)/release
PYTHON_ROOT := python
WASM_SOURCE := $(PYTHON_ROOT)/functionstream-runtime/target/functionstream-python-runtime.wasm

Expand Down Expand Up @@ -67,18 +104,42 @@ help:
@echo ""
@echo " Version: $(VERSION) | Arch: $(ARCH) | OS: $(OS)"

build: .check-env .build-wasm
$(call log,BUILD,Rust Full Features)
@cargo build --release --features python --quiet
# 4. Auto-install missing Rust target toolchain
.ensure-target:
@rustup target list --installed | grep -q "$(TRIPLE)" || \
(printf "$(C_Y)[!] Auto-installing target toolchain for $(OS_NAME): $(TRIPLE)$(C_0)\n" && \
rustup target add $(TRIPLE))

# 5. Build targets (depend on .ensure-target for automatic toolchain setup)
build: .check-env .ensure-target .build-wasm
$(call log,BUILD,Rust Full [$(OS_NAME) / $(TRIPLE)])
@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
cargo build --release \
--target $(TRIPLE) \
--features python \
--quiet
$(call log,BUILD,CLI)
@cargo build --release -p function-stream-cli --quiet
@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
cargo build --release \
--target $(TRIPLE) \
-p function-stream-cli \
--quiet
$(call success,Target: $(TARGET_DIR)/$(APP_NAME) $(TARGET_DIR)/cli)

build-lite: .check-env
$(call log,BUILD,Rust Lite No Python)
@cargo build --release --no-default-features --features incremental-cache --quiet
build-lite: .check-env .ensure-target
$(call log,BUILD,Rust Lite [$(OS_NAME) / $(TRIPLE)])
@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
cargo build --release \
--target $(TRIPLE) \
--no-default-features \
--features incremental-cache \
--quiet
$(call log,BUILD,CLI for dist)
@cargo build --release -p function-stream-cli --quiet
@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
cargo build --release \
--target $(TRIPLE) \
-p function-stream-cli \
--quiet
$(call success,Target: $(TARGET_DIR)/$(APP_NAME) $(TARGET_DIR)/cli)

.build-wasm:
Expand Down
14 changes: 14 additions & 0 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ wasm:
# When cache exceeds this size, least recently used items will be evicted
max_cache_size: 104857600

# Streaming Runtime Configuration
streaming:
# Global memory pool for streaming pipeline execution (buffers, batch collect, backpressure).
# Default / example: 10 MiB (10485760 bytes).
streaming_runtime_memory_bytes: 10485760

# Per stateful operator (join / agg / window): in-memory state store cap before spill.
# Default / example: 5 MiB (5242880 bytes).
operator_state_store_memory_bytes: 5242880
checkpoint_interval_ms: 60000
pipeline_parallelism: 1
# KeyBy (key extraction) operator pipeline parallelism in planned streaming jobs.
key_by_parallelism: 1

# State Storage Configuration
# Used to store runtime state data for tasks
state_storage:
Expand Down
33 changes: 33 additions & 0 deletions protocol/proto/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,28 @@ message CatalogSourceTable {
// Streaming table storage (CREATE STREAMING TABLE persistence)
// =============================================================================

// Partition offset for one Kafka partition at a completed checkpoint.
message KafkaPartitionOffset {
int32 partition = 1;
int64 offset = 2;
}

// Kafka source subtask checkpoint: one file / one TaskContext (pipeline + subtask).
message KafkaSourceSubtaskCheckpoint {
uint32 pipeline_id = 1;
uint32 subtask_index = 2;
// Epoch of the barrier when this snapshot was taken (aligns with latest_checkpoint_epoch on commit).
uint64 checkpoint_epoch = 3;
repeated KafkaPartitionOffset partitions = 4;
}

// Generic source checkpoint payload envelope (enum-like via oneof).
message SourceCheckpointPayload {
oneof checkpoint {
KafkaSourceSubtaskCheckpoint kafka = 1;
}
}

// Persisted record for one streaming table (CREATE STREAMING TABLE).
// On restart, the engine re-submits each record to JobManager to resume the pipeline.
message StreamingTableDefinition {
Expand All @@ -52,6 +74,17 @@ message StreamingTableDefinition {
// Stored as opaque bytes to avoid coupling storage schema with runtime API protos.
bytes fs_program_bytes = 3;
string comment = 4;

uint64 checkpoint_interval_ms = 5;

// Last globally-committed checkpoint epoch.
// Updated by JobManager after all operators ACK. Used for crash recovery.
uint64 latest_checkpoint_epoch = 6;

// Kafka source per-subtask offsets at the same committed epoch as `latest_checkpoint_epoch`.
// Populated by the runtime coordinator from source checkpoint ACKs. Optional `.bin` files under
// the job state dir may exist only for local recovery materialization from this field.
repeated KafkaSourceSubtaskCheckpoint kafka_source_checkpoints = 7;
}

// =============================================================================
Expand Down
26 changes: 23 additions & 3 deletions src/config/global_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,33 @@ use uuid::Uuid;
use crate::config::log_config::LogConfig;
use crate::config::python_config::PythonConfig;
use crate::config::service_config::ServiceConfig;
use crate::config::streaming_job::{ResolvedStreamingJobConfig, StreamingJobConfig};
use crate::config::wasm_config::WasmConfig;

/// Default for [`StreamingConfig::streaming_runtime_memory_bytes`] when unset. **10 MiB** (pipeline buffers, backpressure).
pub const DEFAULT_STREAMING_RUNTIME_MEMORY_BYTES: u64 = 10 * 1024 * 1024;

/// Default for [`StreamingConfig::operator_state_store_memory_bytes`] when unset. **5 MiB** per stateful operator cap.
pub const DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES: u64 = 5 * 1024 * 1024;

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct StreamingConfig {
/// Maximum heap memory (in bytes) available to the streaming runtime's memory pool.
/// Defaults to 256 MiB when absent.
pub max_memory_bytes: Option<usize>,
#[serde(flatten)]
pub job: StreamingJobConfig,
/// Bytes reserved in the global memory pool for streaming pipeline execution (buffers,
/// batch collect, backpressure). Default 10 MiB.
#[serde(default)]
pub streaming_runtime_memory_bytes: Option<u64>,
/// Per stateful operator: in-memory state store cap before spill. Default 5 MiB.
#[serde(default)]
pub operator_state_store_memory_bytes: Option<u64>,
}

impl StreamingConfig {
#[inline]
pub fn resolved_job(&self) -> ResolvedStreamingJobConfig {
self.job.resolve()
}
}

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
Expand Down
7 changes: 6 additions & 1 deletion src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@ pub mod paths;
pub mod python_config;
pub mod service_config;
pub mod storage;
pub mod streaming_job;
pub mod system;
pub mod wasm_config;

pub use global_config::GlobalConfig;
pub use global_config::{
DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES, DEFAULT_STREAMING_RUNTIME_MEMORY_BYTES, GlobalConfig,
};
pub use loader::load_global_config;
pub use log_config::LogConfig;
#[allow(unused_imports)]
Expand All @@ -31,3 +35,4 @@ pub use paths::{
};
#[cfg(feature = "python")]
pub use python_config::PythonConfig;
pub use streaming_job::{DEFAULT_CHECKPOINT_INTERVAL_MS, DEFAULT_PIPELINE_PARALLELISM};
72 changes: 72 additions & 0 deletions src/config/streaming_job.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use serde::{Deserialize, Serialize};

pub const DEFAULT_CHECKPOINT_INTERVAL_MS: u64 = 60 * 1000;
pub const DEFAULT_PIPELINE_PARALLELISM: u32 = 1;
pub const DEFAULT_KEY_BY_PARALLELISM: u32 = 1;
pub const DEFAULT_JOB_MANAGER_CONTROL_PLANE_THREADS: u32 = 1;

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct StreamingJobConfig {
#[serde(default)]
pub checkpoint_interval_ms: Option<u64>,
#[serde(default)]
pub pipeline_parallelism: Option<u32>,
/// Physical parallelism for KeyBy / key-extraction operators in planned streaming graphs.
#[serde(default)]
pub key_by_parallelism: Option<u32>,
#[serde(default)]
pub job_manager_control_plane_threads: Option<u32>,
#[serde(default)]
pub job_manager_data_plane_threads: Option<u32>,
}

#[derive(Debug, Clone, Copy)]
pub struct ResolvedStreamingJobConfig {
pub checkpoint_interval_ms: u64,
pub pipeline_parallelism: u32,
pub key_by_parallelism: u32,
pub job_manager_control_plane_threads: u32,
pub job_manager_data_plane_threads: u32,
}

impl StreamingJobConfig {
pub fn resolve(&self) -> ResolvedStreamingJobConfig {
let cpu_threads = std::thread::available_parallelism()
.map(|n| n.get() as u32)
.unwrap_or(1);
ResolvedStreamingJobConfig {
checkpoint_interval_ms: self
.checkpoint_interval_ms
.filter(|&ms| ms > 0)
.unwrap_or(DEFAULT_CHECKPOINT_INTERVAL_MS),
pipeline_parallelism: self
.pipeline_parallelism
.filter(|&p| p > 0)
.unwrap_or(DEFAULT_PIPELINE_PARALLELISM),
key_by_parallelism: self
.key_by_parallelism
.filter(|&p| p > 0)
.unwrap_or(DEFAULT_KEY_BY_PARALLELISM),
job_manager_control_plane_threads: self
.job_manager_control_plane_threads
.filter(|&p| p > 0)
.unwrap_or(DEFAULT_JOB_MANAGER_CONTROL_PLANE_THREADS),
job_manager_data_plane_threads: self
.job_manager_data_plane_threads
.filter(|&p| p > 0)
.unwrap_or(cpu_threads),
}
}
}
Loading
Loading