FunctionStream · evanwave · Apr 21, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -24,7 +24,7 @@ tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "sync", "tim
 serde = { version = "1.0", features = ["derive"] }
 serde_yaml = "0.9"
 serde_json = "1.0"
-uuid = { version = "1.0", features = ["v4"] }
+uuid = { version = "1.0", features = ["v4", "v7"] }
 log = "0.4"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
@@ -51,6 +51,7 @@ arrow = { version = "55", default-features = false }
 arrow-array = "55"
 arrow-ipc = "55"
 arrow-schema = { version = "55", features = ["serde"] }
+parquet = "55"
 futures = "0.3"
 serde_json_path = "0.7"
 xxhash-rust = { version = "0.8", features = ["xxh3"] }
@@ -78,3 +79,6 @@ governor = "0.8.0"
 default = ["incremental-cache", "python"]
 incremental-cache = ["wasmtime/incremental-cache"]
 python = []
+
+[dev-dependencies]
+tempfile = "3.27.0"
diff --git a/Makefile b/Makefile
@@ -13,12 +13,49 @@
 
 APP_NAME    := function-stream
 VERSION     := $(shell grep '^version' Cargo.toml | head -1 | awk -F '"' '{print $$2}')
-ARCH        := $(shell uname -m)
-OS          := $(shell uname -s | tr '[:upper:]' '[:lower:]')
 DATE        := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
 
+# 1. Auto-detect system environment & normalize architecture
+RAW_ARCH := $(shell uname -m)
+# Fix macOS M-series returning arm64 while Rust expects aarch64
+ifeq ($(RAW_ARCH), arm64)
+    ARCH := aarch64
+else ifeq ($(RAW_ARCH), amd64)
+    ARCH := x86_64
+else
+    ARCH := $(RAW_ARCH)
+endif
+
+OS          := $(shell uname -s | tr '[:upper:]' '[:lower:]')
+OS_NAME     := $(shell uname -s)
+
+# 2. Configure RUSTFLAGS and target triple per platform
 DIST_ROOT   := dist
-TARGET_DIR  := target/release
+ifeq ($(OS_NAME), Linux)
+    TRIPLE := $(ARCH)-unknown-linux-gnu
+    STATIC_FLAGS :=
+else ifeq ($(OS_NAME), Darwin)
+    # macOS: strip symbols but keep dynamic linking (Apple system restriction)
+    TRIPLE := $(ARCH)-apple-darwin
+    STATIC_FLAGS :=
+else ifneq (,$(findstring MINGW,$(OS_NAME))$(findstring MSYS,$(OS_NAME)))
+    # Windows (Git Bash / MSYS2): static-link MSVC runtime
+    TRIPLE := $(ARCH)-pc-windows-msvc
+    STATIC_FLAGS := -C target-feature=+crt-static
+else
+    # Fallback
+    TRIPLE := $(ARCH)-unknown-linux-gnu
+    STATIC_FLAGS :=
+endif
+
+# 3. Aggressive optimization flags
+# opt-level=z  : size-oriented, minimize binary footprint
+# strip=symbols: remove debug symbol table at link time
+# Note: panic=abort is intentionally omitted to preserve stack unwinding
+#       for better fault tolerance in the streaming runtime
+OPTIMIZE_FLAGS := -C opt-level=z -C strip=symbols $(STATIC_FLAGS)
+
+TARGET_DIR  := target/$(TRIPLE)/release
 PYTHON_ROOT := python
 WASM_SOURCE := $(PYTHON_ROOT)/functionstream-runtime/target/functionstream-python-runtime.wasm
 
@@ -67,18 +104,42 @@ help:
 	@echo ""
 	@echo "  Version: $(VERSION) | Arch: $(ARCH) | OS: $(OS)"
 
-build: .check-env .build-wasm
-	$(call log,BUILD,Rust Full Features)
-	@cargo build --release --features python --quiet
+# 4. Auto-install missing Rust target toolchain
+.ensure-target:
+	@rustup target list --installed | grep -q "$(TRIPLE)" || \
+	(printf "$(C_Y)[!] Auto-installing target toolchain for $(OS_NAME): $(TRIPLE)$(C_0)\n" && \
+	 rustup target add $(TRIPLE))
+
+# 5. Build targets (depend on .ensure-target for automatic toolchain setup)
+build: .check-env .ensure-target .build-wasm
+	$(call log,BUILD,Rust Full [$(OS_NAME) / $(TRIPLE)])
+	@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
+	cargo build --release \
+		--target $(TRIPLE) \
+		--features python \
+		--quiet
 	$(call log,BUILD,CLI)
-	@cargo build --release -p function-stream-cli --quiet
+	@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
+	cargo build --release \
+		--target $(TRIPLE) \
+		-p function-stream-cli \
+		--quiet
 	$(call success,Target: $(TARGET_DIR)/$(APP_NAME) $(TARGET_DIR)/cli)
 
-build-lite: .check-env
-	$(call log,BUILD,Rust Lite No Python)
-	@cargo build --release --no-default-features --features incremental-cache --quiet
+build-lite: .check-env .ensure-target
+	$(call log,BUILD,Rust Lite [$(OS_NAME) / $(TRIPLE)])
+	@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
+	cargo build --release \
+		--target $(TRIPLE) \
+		--no-default-features \
+		--features incremental-cache \
+		--quiet
 	$(call log,BUILD,CLI for dist)
-	@cargo build --release -p function-stream-cli --quiet
+	@RUSTFLAGS="$(OPTIMIZE_FLAGS)" \
+	cargo build --release \
+		--target $(TRIPLE) \
+		-p function-stream-cli \
+		--quiet
 	$(call success,Target: $(TARGET_DIR)/$(APP_NAME) $(TARGET_DIR)/cli)
 
 .build-wasm:

diff --git a/conf/config.yaml b/conf/config.yaml
@@ -49,6 +49,20 @@ wasm:
   # When cache exceeds this size, least recently used items will be evicted
   max_cache_size: 104857600
 
+# Streaming Runtime Configuration
+streaming:
+  # Global memory pool for streaming pipeline execution (buffers, batch collect, backpressure).
+  # Default / example: 10 MiB (10485760 bytes).
+  streaming_runtime_memory_bytes: 10485760
+
+  # Per stateful operator (join / agg / window): in-memory state store cap before spill.
+  # Default / example: 5 MiB (5242880 bytes).
+  operator_state_store_memory_bytes: 5242880
+  checkpoint_interval_ms: 60000
+  pipeline_parallelism: 1
+  # KeyBy (key extraction) operator pipeline parallelism in planned streaming jobs.
+  key_by_parallelism: 1
+
 # State Storage Configuration
 # Used to store runtime state data for tasks
 state_storage:

diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
@@ -43,6 +43,28 @@ message CatalogSourceTable {
 // Streaming table storage (CREATE STREAMING TABLE persistence)
 // =============================================================================
 
+// Partition offset for one Kafka partition at a completed checkpoint.
+message KafkaPartitionOffset {
+  int32 partition = 1;
+  int64 offset = 2;
+}
+
+// Kafka source subtask checkpoint: one file / one TaskContext (pipeline + subtask).
+message KafkaSourceSubtaskCheckpoint {
+  uint32 pipeline_id = 1;
+  uint32 subtask_index = 2;
+  // Epoch of the barrier when this snapshot was taken (aligns with latest_checkpoint_epoch on commit).
+  uint64 checkpoint_epoch = 3;
+  repeated KafkaPartitionOffset partitions = 4;
+}
+
+// Generic source checkpoint payload envelope (enum-like via oneof).
+message SourceCheckpointPayload {
+  oneof checkpoint {
+    KafkaSourceSubtaskCheckpoint kafka = 1;
+  }
+}
+
 // Persisted record for one streaming table (CREATE STREAMING TABLE).
 // On restart, the engine re-submits each record to JobManager to resume the pipeline.
 message StreamingTableDefinition {
@@ -52,6 +74,17 @@ message StreamingTableDefinition {
   // Stored as opaque bytes to avoid coupling storage schema with runtime API protos.
   bytes  fs_program_bytes = 3;
   string comment = 4;
+
+  uint64 checkpoint_interval_ms = 5;
+
+  // Last globally-committed checkpoint epoch.
+  // Updated by JobManager after all operators ACK. Used for crash recovery.
+  uint64 latest_checkpoint_epoch = 6;
+
+  // Kafka source per-subtask offsets at the same committed epoch as `latest_checkpoint_epoch`.
+  // Populated by the runtime coordinator from source checkpoint ACKs. Optional `.bin` files under
+  // the job state dir may exist only for local recovery materialization from this field.
+  repeated KafkaSourceSubtaskCheckpoint kafka_source_checkpoints = 7;
 }
 
 // =============================================================================

diff --git a/src/config/global_config.rs b/src/config/global_config.rs
@@ -17,13 +17,33 @@ use uuid::Uuid;
 use crate::config::log_config::LogConfig;
 use crate::config::python_config::PythonConfig;
 use crate::config::service_config::ServiceConfig;
+use crate::config::streaming_job::{ResolvedStreamingJobConfig, StreamingJobConfig};
 use crate::config::wasm_config::WasmConfig;
 
+/// Default for [`StreamingConfig::streaming_runtime_memory_bytes`] when unset. **10 MiB** (pipeline buffers, backpressure).
+pub const DEFAULT_STREAMING_RUNTIME_MEMORY_BYTES: u64 = 10 * 1024 * 1024;
+
+/// Default for [`StreamingConfig::operator_state_store_memory_bytes`] when unset. **5 MiB** per stateful operator cap.
+pub const DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES: u64 = 5 * 1024 * 1024;
+
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct StreamingConfig {
-    /// Maximum heap memory (in bytes) available to the streaming runtime's memory pool.
-    /// Defaults to 256 MiB when absent.
-    pub max_memory_bytes: Option<usize>,
+    #[serde(flatten)]
+    pub job: StreamingJobConfig,
+    /// Bytes reserved in the global memory pool for streaming pipeline execution (buffers,
+    /// batch collect, backpressure). Default 10 MiB.
+    #[serde(default)]
+    pub streaming_runtime_memory_bytes: Option<u64>,
+    /// Per stateful operator: in-memory state store cap before spill. Default 5 MiB.
+    #[serde(default)]
+    pub operator_state_store_memory_bytes: Option<u64>,
+}
+
+impl StreamingConfig {
+    #[inline]
+    pub fn resolved_job(&self) -> ResolvedStreamingJobConfig {
+        self.job.resolve()
+    }
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]

diff --git a/src/config/mod.rs b/src/config/mod.rs
@@ -17,9 +17,13 @@ pub mod paths;
 pub mod python_config;
 pub mod service_config;
 pub mod storage;
+pub mod streaming_job;
+pub mod system;
 pub mod wasm_config;
 
-pub use global_config::GlobalConfig;
+pub use global_config::{
+    DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES, DEFAULT_STREAMING_RUNTIME_MEMORY_BYTES, GlobalConfig,
+};
 pub use loader::load_global_config;
 pub use log_config::LogConfig;
 #[allow(unused_imports)]
@@ -31,3 +35,4 @@ pub use paths::{
 };
 #[cfg(feature = "python")]
 pub use python_config::PythonConfig;
+pub use streaming_job::{DEFAULT_CHECKPOINT_INTERVAL_MS, DEFAULT_PIPELINE_PARALLELISM};
diff --git a/src/config/streaming_job.rs b/src/config/streaming_job.rs
@@ -0,0 +1,72 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use serde::{Deserialize, Serialize};
+
+pub const DEFAULT_CHECKPOINT_INTERVAL_MS: u64 = 60 * 1000;
+pub const DEFAULT_PIPELINE_PARALLELISM: u32 = 1;
+pub const DEFAULT_KEY_BY_PARALLELISM: u32 = 1;
+pub const DEFAULT_JOB_MANAGER_CONTROL_PLANE_THREADS: u32 = 1;
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct StreamingJobConfig {
+    #[serde(default)]
+    pub checkpoint_interval_ms: Option<u64>,
+    #[serde(default)]
+    pub pipeline_parallelism: Option<u32>,
+    /// Physical parallelism for KeyBy / key-extraction operators in planned streaming graphs.
+    #[serde(default)]
+    pub key_by_parallelism: Option<u32>,
+    #[serde(default)]
+    pub job_manager_control_plane_threads: Option<u32>,
+    #[serde(default)]
+    pub job_manager_data_plane_threads: Option<u32>,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct ResolvedStreamingJobConfig {
+    pub checkpoint_interval_ms: u64,
+    pub pipeline_parallelism: u32,
+    pub key_by_parallelism: u32,
+    pub job_manager_control_plane_threads: u32,
+    pub job_manager_data_plane_threads: u32,
+}
+
+impl StreamingJobConfig {
+    pub fn resolve(&self) -> ResolvedStreamingJobConfig {
+        let cpu_threads = std::thread::available_parallelism()
+            .map(|n| n.get() as u32)
+            .unwrap_or(1);
+        ResolvedStreamingJobConfig {
+            checkpoint_interval_ms: self
+                .checkpoint_interval_ms
+                .filter(|&ms| ms > 0)
+                .unwrap_or(DEFAULT_CHECKPOINT_INTERVAL_MS),
+            pipeline_parallelism: self
+                .pipeline_parallelism
+                .filter(|&p| p > 0)
+                .unwrap_or(DEFAULT_PIPELINE_PARALLELISM),
+            key_by_parallelism: self
+                .key_by_parallelism
+                .filter(|&p| p > 0)
+                .unwrap_or(DEFAULT_KEY_BY_PARALLELISM),
+            job_manager_control_plane_threads: self
+                .job_manager_control_plane_threads
+                .filter(|&p| p > 0)
+                .unwrap_or(DEFAULT_JOB_MANAGER_CONTROL_PLANE_THREADS),
+            job_manager_data_plane_threads: self
+                .job_manager_data_plane_threads
+                .filter(|&p| p > 0)
+                .unwrap_or(cpu_threads),
+        }
+    }
+}