Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
386f849
ore: add pager feature flag
antiguru May 4, 2026
fb9edde
ore: skeleton mz_ore::pager module with Backend enum
antiguru May 4, 2026
c3d9e20
ore: pager scratch dir lifecycle and stale-subdir reaper
antiguru May 4, 2026
fa1ea53
ore: pager Handle type and inner storage scaffolding
antiguru May 4, 2026
96f1fb3
ore: pager swap backend pageout with MADV_COLD
antiguru May 4, 2026
a5d992c
ore: pager swap backend read_at_many
antiguru May 4, 2026
991a5e7
ore: pager swap backend take with zero-copy fast path
antiguru May 4, 2026
22c3cba
ore: pager public dispatch surface (pageout/read_at/take)
antiguru May 4, 2026
b0d744f
ore: pager file backend pageout with pwritev
antiguru May 4, 2026
e946d4f
ore: pager file backend read_at_many with coalescing
antiguru May 4, 2026
b4a90fa
ore: pager file backend take and drop reclaim
antiguru May 4, 2026
d885439
ore: pager cross-backend integration tests
antiguru May 4, 2026
f69df0a
ore: pager Criterion bench harness
antiguru May 4, 2026
dff9f9b
ore: pager clippy + lint cleanups (write_vectored, cast_from, exhaust…
antiguru May 4, 2026
d2bbde6
ore: pager copyright headers and test-attribute lint compliance
antiguru May 4, 2026
1a709da
ore: pager bench round-trip with touch-every-page readback
antiguru May 4, 2026
18eadea
ore: pager merge-batcher example with cache-line touch
antiguru May 4, 2026
5520d23
ore: update Cargo.lock for pager tempfile dev-dep
antiguru May 4, 2026
34778b8
ore: pager prefetch and prefetch_at hints
antiguru May 5, 2026
f4244a5
ore: replace as_conversions with cast_from/cast_lossy/try_from
antiguru May 5, 2026
ae4bb83
ore: pager merge example takes --threads, partitions chain
antiguru May 5, 2026
f2c6806
doc: pager design — add operational characteristics with measured thr…
antiguru May 5, 2026
879ffbe
ore: gate pager_merge example on the pager feature
antiguru May 5, 2026
cf4714f
ore: drop pager prefetch API and example usage
antiguru May 5, 2026
f853ba5
doc: pager design — add r8gd.16xlarge bench, retract swap-caps-regard…
antiguru May 5, 2026
bb679b5
ore: pageout_with helper for explicit-backend dispatch
antiguru May 14, 2026
307d604
timely-util: column_pager with policy + lz4
antiguru May 14, 2026
0d4993a
timely-util: tiered paging policy + drop-based release
antiguru May 14, 2026
ef63b8b
timely-util: criterion bench for column_pager
antiguru May 14, 2026
64745fa
timely-util: relabel swap-backend bench as swap-warm
antiguru May 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ launchdarkly-server-sdk = { version = "2.6.2", default-features = false }
lgalloc = "0.6.0"
libc = "0.2.184"
lru = "0.16.3"
lz4_flex = { version = "0.12.1", default-features = false, features = ["frame", "std"] }
maplit = "1.0.2"
mappings = "0.7.2"
md-5 = "0.10.6"
Expand Down
411 changes: 411 additions & 0 deletions doc/developer/design/20260504_pager.md

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions src/ore/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ mz-ore = { path = "../ore", features = ["id_gen", "chrono"] }
proptest.workspace = true
scopeguard.workspace = true
serde_json.workspace = true
tempfile.workspace = true
tokio.workspace = true
tokio-test.workspace = true
tracing-subscriber.workspace = true
Expand Down Expand Up @@ -145,6 +146,7 @@ assert-no-tracing = []
assert = ["assert-no-tracing", "ctor", "tracing"]
proptest = ["dep:proptest", "proptest-derive"]
overflowing = ["assert"]
pager = ["dep:bytemuck", "libc", "rand", "dep:tracing"]

[[test]]
name = "future"
Expand All @@ -167,6 +169,15 @@ name = "bytes"
harness = false
required-features = ["bytes", "region", "tracing"]

[[bench]]
name = "pager"
harness = false
required-features = ["pager"]

[[example]]
name = "pager_merge"
required-features = ["pager"]

[package.metadata.cargo-udeps.ignore]
# Only used in doc-tests.
development = ["tokio-test"]
141 changes: 141 additions & 0 deletions src/ore/benches/pager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file at the
// root of this repository, or online at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![cfg(feature = "pager")]

use std::hint::black_box;
use std::path::PathBuf;
use std::time::{Duration, Instant};

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use mz_ore::cast::CastFrom;
use mz_ore::pager::{self, Backend, Handle};

const PAGE_BYTES: usize = 4096;
const PAGE_U64S: usize = PAGE_BYTES / 8;

fn ensure_scratch() {
static INIT: std::sync::Once = std::sync::Once::new();
INIT.call_once(|| {
let dir: PathBuf = std::env::var_os("MZ_PAGER_SCRATCH")
.map(PathBuf::from)
.unwrap_or_else(std::env::temp_dir);
pager::set_scratch_dir(dir);
});
}

fn fill_payload(len_u64s: usize) -> Vec<u64> {
(0..u64::cast_from(len_u64s)).collect()
}

/// Reads one `u64` from each page of `buf` to force the kernel to fault them in.
/// Returns a side-effecting sum so the compiler cannot elide the loads.
fn touch_every_page(buf: &[u64]) -> u64 {
let mut s: u64 = 0;
let mut i = 0;
while i < buf.len() {
s = s.wrapping_add(buf[i]);
i += PAGE_U64S;
}
s
}

/// Round-trip a single-chunk payload through the pager and touch every page on
/// readback. Reuses the buffer between iterations so allocation/page-fault tax
/// is paid once at setup, not measured.
fn round_trip_single(c: &mut Criterion) {
ensure_scratch();
let mut group = c.benchmark_group("pager/round_trip_touch/single");
group.measurement_time(Duration::from_secs(5));
for size_kib in [4usize, 64, 1024, 2048, 16384] {
let len = (size_kib * 1024) / 8;
for backend in [Backend::Swap, Backend::File] {
pager::set_backend(backend);
group.throughput(Throughput::Bytes(u64::cast_from(size_kib * 1024)));
group.bench_function(BenchmarkId::new(format!("{backend:?}"), size_kib), |b| {
b.iter_custom(|iters| {
let mut payload = fill_payload(len);
let mut tmp: Vec<u64> = Vec::with_capacity(len);
let start = Instant::now();
for _ in 0..iters {
let mut chunks = [std::mem::take(&mut payload)];
let h: Handle = pager::pageout(&mut chunks);
pager::take(h, &mut tmp);
black_box(touch_every_page(&tmp));
payload = std::mem::take(&mut tmp);
tmp = Vec::with_capacity(len);
}
start.elapsed()
});
});
}
}
group.finish();
}

/// Round-trip a scatter-input (multiple chunks forming one logical 2 MiB block).
/// Measures the same touch-every-page readback pattern as `round_trip_single`.
fn round_trip_scatter_2mib(c: &mut Criterion) {
ensure_scratch();
let mut group = c.benchmark_group("pager/round_trip_touch/scatter_2MiB");
group.measurement_time(Duration::from_secs(5));
let total_bytes: usize = 2 * 1024 * 1024;
for chunk_count in [1usize, 2, 8, 64] {
let chunk_bytes = total_bytes / chunk_count;
let chunk_len_u64s = chunk_bytes / 8;
for backend in [Backend::Swap, Backend::File] {
pager::set_backend(backend);
group.throughput(Throughput::Bytes(u64::cast_from(total_bytes)));
group.bench_function(BenchmarkId::new(format!("{backend:?}"), chunk_count), |b| {
b.iter_custom(|iters| {
let mut payload: Vec<Vec<u64>> = (0..chunk_count)
.map(|_| fill_payload(chunk_len_u64s))
.collect();
let mut tmp: Vec<u64> = Vec::with_capacity(total_bytes / 8);
let start = Instant::now();
for _ in 0..iters {
let h: Handle = pager::pageout(payload.as_mut_slice());
pager::take(h, &mut tmp);
black_box(touch_every_page(&tmp));
// Rebuild the input from `tmp` for the next iteration:
// swap the consolidated buffer back into chunk 0, leave
// the other chunks empty (they were drained by the
// swap backend's `mem::take`). The file backend already
// preserved their capacity, so this still amortizes its
// allocation cost.
payload[0] = std::mem::take(&mut tmp);
tmp = Vec::with_capacity(total_bytes / 8);
// For chunk_count > 1, refill the trailing chunks by
// splitting payload[0] back into the original shape.
if chunk_count > 1 {
let mut head = std::mem::take(&mut payload[0]);
for i in 1..chunk_count {
let take_len = std::cmp::min(chunk_len_u64s, head.len());
let tail = head.split_off(head.len() - take_len);
payload[i] = tail;
}
payload[0] = head;
}
}
start.elapsed()
});
});
}
}
group.finish();
}

criterion_group!(benches, round_trip_single, round_trip_scatter_2mib);
criterion_main!(benches);
Loading
Loading