From 762ca75e5c2a33c80e388b4817d5320510ab3c68 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Tue, 19 May 2026 09:47:12 +0200 Subject: [PATCH 1/3] Add pg_deltax pg_deltax is Xata's time-series PostgreSQL extension that adds columnar storage and compression to a partitioned hits table. Implements the per-system script interface (install/start/check/stop/load/query/ data-size + benchmark.sh shim) so the shared driver in lib/ runs it end-to-end. Includes a c6a.4xlarge result run from the new methodology (true-cold cycles + concurrent QPS). --- pg_deltax/benchmark.sh | 5 + pg_deltax/check | 4 + pg_deltax/create.sql | 108 ++++++++++++++++++++ pg_deltax/data-size | 8 ++ pg_deltax/install | 74 ++++++++++++++ pg_deltax/load | 46 +++++++++ pg_deltax/queries.sql | 43 ++++++++ pg_deltax/query | 28 +++++ pg_deltax/results/20260519/c6a.4xlarge.json | 59 +++++++++++ pg_deltax/start | 5 + pg_deltax/stop | 4 + pg_deltax/template.json | 12 +++ 12 files changed, 396 insertions(+) create mode 100755 pg_deltax/benchmark.sh create mode 100755 pg_deltax/check create mode 100644 pg_deltax/create.sql create mode 100755 pg_deltax/data-size create mode 100755 pg_deltax/install create mode 100755 pg_deltax/load create mode 100644 pg_deltax/queries.sql create mode 100755 pg_deltax/query create mode 100644 pg_deltax/results/20260519/c6a.4xlarge.json create mode 100755 pg_deltax/start create mode 100755 pg_deltax/stop create mode 100644 pg_deltax/template.json diff --git a/pg_deltax/benchmark.sh b/pg_deltax/benchmark.sh new file mode 100755 index 0000000000..9a2f80d2e1 --- /dev/null +++ b/pg_deltax/benchmark.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Thin shim — actual flow is in lib/benchmark-common.sh. +export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned" +export BENCH_DURABLE=yes +exec ../lib/benchmark-common.sh diff --git a/pg_deltax/check b/pg_deltax/check new file mode 100755 index 0000000000..5c6f711234 --- /dev/null +++ b/pg_deltax/check @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +sudo -u postgres psql -t -c 'SELECT 1' >/dev/null diff --git a/pg_deltax/create.sql b/pg_deltax/create.sql new file mode 100644 index 0000000000..41c961c00f --- /dev/null +++ b/pg_deltax/create.sql @@ -0,0 +1,108 @@ +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +); diff --git a/pg_deltax/data-size b/pg_deltax/data-size new file mode 100755 index 0000000000..00a3184587 --- /dev/null +++ b/pg_deltax/data-size @@ -0,0 +1,8 @@ +#!/bin/bash +# Report the test database's on-disk size: tables + indexes + TOAST. Excludes +# pg_wal (durability metadata that grows with activity rather than dataset +# size) and cluster-wide files (pg_global, pg_xact). Same convention as +# pgpro_tam and postgresql-orioledb. +set -eu + +sudo -u postgres psql -t -A -c "SELECT pg_database_size('test')" diff --git a/pg_deltax/install b/pg_deltax/install new file mode 100755 index 0000000000..c222053876 --- /dev/null +++ b/pg_deltax/install @@ -0,0 +1,74 @@ +#!/bin/bash +# Install PostgreSQL 18, Rust + cargo-pgrx, build and install the pg_deltax +# extension from source, and wire it into shared_preload_libraries. Idempotent. +set -eu + +PGVERSION=${PGVERSION:-18} +PG_DELTAX_REPO=${PG_DELTAX_REPO:-https://github.com/xataio/pg_deltax.git} +# Empty by default: don't touch git state. Set to a tag/branch/sha to force +# a checkout. Avoids clobbering local iteration when ~/pg_deltax is either a +# git checkout on a feature branch or a non-git rsync from `make deploy`. +PG_DELTAX_REF=${PG_DELTAX_REF:-} +PG_CONFIG=/usr/lib/postgresql/$PGVERSION/bin/pg_config + +# PostgreSQL 18 +export DEBIAN_FRONTEND=noninteractive +sudo apt-get update -y +sudo apt-get install -y gnupg postgresql-common apt-transport-https lsb-release wget git +sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y +sudo apt-get update -y +sudo apt-get install -y postgresql-$PGVERSION postgresql-client-$PGVERSION postgresql-server-dev-$PGVERSION + +# Tune PostgreSQL the same way the vanilla postgresql/ benchmark does. +memory=$(awk '/MemTotal/ {print $2}' /proc/meminfo) +threads=$(nproc) +cpus=$((threads / 2)) +shared_buffers=$((memory / 4)) +effective_cache_size=$((memory - memory / 4)) +max_worker_processes=$((threads + 15)) + +sudo tee /etc/postgresql/$PGVERSION/main/conf.d/clickbench.conf <> /etc/postgresql/$PGVERSION/main/postgresql.conf" +fi + +# Rust toolchain (idempotent: rustup is a no-op if already installed). +if ! command -v cargo >/dev/null 2>&1; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable +fi +# shellcheck disable=SC1091 +source "$HOME/.cargo/env" + +sudo apt-get install -y pkg-config libssl-dev libclang-dev clang + +cargo install --locked cargo-pgrx --version 0.17.0 + +# Build and install pg_deltax against this PG. +# - Fresh box: clone the public repo. +# - Existing git checkout: leave it alone unless PG_DELTAX_REF is set. +# - Existing non-git dir (e.g. rsync'd by `make deploy`): just build whatever +# is there. Don't try to `git fetch` — it would fail. +if [ ! -d "$HOME/pg_deltax" ]; then + git clone "$PG_DELTAX_REPO" "$HOME/pg_deltax" +fi +cd "$HOME/pg_deltax" +if [ -n "$PG_DELTAX_REF" ] && [ -d .git ]; then + git fetch --tags origin + git checkout "$PG_DELTAX_REF" +fi +cargo pgrx init --pg$PGVERSION "$PG_CONFIG" +sudo env "PATH=$PATH" "RUSTUP_HOME=${RUSTUP_HOME:-$HOME/.rustup}" "CARGO_HOME=${CARGO_HOME:-$HOME/.cargo}" "PGRX_HOME=$HOME/.pgrx" \ + cargo pgrx install --pg-config "$PG_CONFIG" --release + +sudo systemctl restart postgresql@$PGVERSION-main diff --git a/pg_deltax/load b/pg_deltax/load new file mode 100755 index 0000000000..0dae2000ca --- /dev/null +++ b/pg_deltax/load @@ -0,0 +1,46 @@ +#!/bin/bash +# Drop+create the test DB, install pg_deltax, set up the partitioned hits table, +# enable compression, and direct-backfill load the 100 hits_*.parquet files in +# a single COPY ... WITH (FORMAT deltax_compress). +set -eu + +PARQUET_DIR=/tmp/hits_parquet + +# Move parquet files into a path the postgres server user can read. +sudo rm -rf "$PARQUET_DIR" +sudo mkdir -p "$PARQUET_DIR" +sudo mv hits_*.parquet "$PARQUET_DIR/" +sudo chmod 644 "$PARQUET_DIR"/*.parquet + +# Recreate the DB so this script is idempotent. DROP wipes any prior +# ALTER DATABASE settings so we start from postgresql.conf defaults +# (work_mem=64MB, set by ./install in conf.d/clickbench.conf). +sudo -u postgres psql -v ON_ERROR_STOP=1 -t -c "DROP DATABASE IF EXISTS test" +sudo -u postgres psql -v ON_ERROR_STOP=1 -t -c "CREATE DATABASE test" + +# Bump work_mem for the load only. Direct backfill sorts each segment before +# compressing; 1GB keeps larger segments in memory and shaves load time. +# Reset before the query phase so the concurrent-QPS test (10 connections +# each spawning parallel-scan workers) doesn't multiply this up to OOM. +sudo -u postgres psql -v ON_ERROR_STOP=1 -t -c "ALTER DATABASE test SET work_mem TO '1GB'" + +sudo -u postgres psql -v ON_ERROR_STOP=1 -t test -c "CREATE EXTENSION pg_deltax" + +# Schema + partitioning + compression setup. mock_now pins the partition +# boundary calculation to the dataset's epoch (the hits data is from 2013). +sudo -u postgres psql -v ON_ERROR_STOP=1 -t test < create.sql +sudo -u postgres psql -v ON_ERROR_STOP=1 -t test -c "SET pg_deltax.mock_now = '2013-07-01 12:00:00'; SELECT deltax_create_table('hits', 'eventtime', '3 days'::interval, 15)" +sudo -u postgres psql -v ON_ERROR_STOP=1 -t test -c "SELECT deltax_enable_compression('hits', order_by => ARRAY['counterid', 'userid', 'eventtime'], segment_size => 30000)" + +# Direct backfill: load and compress in a single pass using FORMAT deltax_compress. +sudo -u postgres psql -v ON_ERROR_STOP=1 test -c "COPY hits FROM '$PARQUET_DIR/hits_*.parquet' WITH (FORMAT deltax_compress)" + +sudo -u postgres psql -v ON_ERROR_STOP=1 -t test -c "VACUUM FREEZE ANALYZE hits" + +# Restore default work_mem (64MB) and disable JIT for the query phase. +sudo -u postgres psql -v ON_ERROR_STOP=1 -t -c "ALTER DATABASE test RESET work_mem" +sudo -u postgres psql -v ON_ERROR_STOP=1 -t -c "ALTER DATABASE test SET jit TO off" + +# Free disk: the source parquet files are no longer needed. +sudo rm -rf "$PARQUET_DIR" +sync diff --git a/pg_deltax/queries.sql b/pg_deltax/queries.sql new file mode 100644 index 0000000000..31f65fc898 --- /dev/null +++ b/pg_deltax/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/pg_deltax/query b/pg_deltax/query new file mode 100755 index 0000000000..a89c47a31b --- /dev/null +++ b/pg_deltax/query @@ -0,0 +1,28 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it via psql against the `test` DB. +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line (parsed from +# psql's `\timing` output). +# Exit non-zero on error. +set -e + +query=$(cat) + +out=$(printf '\\timing\n%s\n' "$query" | sudo -u postgres psql test -t 2>&1) +status=$? + +if printf '%s\n' "$out" | grep -q '^ERROR\|psql: error'; then + printf '%s\n' "$out" >&2 + exit 1 +fi + +printf '%s\n' "$out" | grep -v '^Time:' + +time_ms=$(printf '%s\n' "$out" | grep -oP 'Time:\s+\K[0-9]+\.[0-9]+' | tail -n1) +if [ -z "$time_ms" ]; then + echo "no timing in psql output" >&2 + exit 1 +fi +awk -v ms="$time_ms" 'BEGIN { printf "%.3f\n", ms / 1000 }' >&2 + +exit "$status" diff --git a/pg_deltax/results/20260519/c6a.4xlarge.json b/pg_deltax/results/20260519/c6a.4xlarge.json new file mode 100644 index 0000000000..26e3751045 --- /dev/null +++ b/pg_deltax/results/20260519/c6a.4xlarge.json @@ -0,0 +1,59 @@ +{ + "system": "pg_deltax", + "date": "2026-05-19", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust", "PostgreSQL compatible", "column-oriented", "time-series"], + "load_time": 369, + "data_size": 14588540607, + "concurrent_qps": 0.425, + "concurrent_error_ratio": 0.899, + "result": [ + [0.091, 0.003, 0.004], + [0.793, 0.229, 0.221], + [0.245, 0.03, 0.03], + [0.212, 0.029, 0.027], + [3.032, 0.547, 0.545], + [4.466, 0.445, 0.448], + [0.197, 0.017, 0.017], + [0.599, 0.097, 0.095], + [5.175, 0.802, 0.791], + [6.9, 1.01, 1.009], + [4.052, 0.212, 0.215], + [4.676, 0.302, 0.296], + [5.151, 1.144, 1.124], + [10.087, 2.373, 2.438], + [6.358, 1.268, 1.292], + [4.156, 2.052, 2.122], + [10.862, 2.508, 2.485], + [9.01, 0.678, 0.64], + [21.416, 5.003, 4.904], + [1.055, 0.037, 0.036], + [22.183, 4.58, 4.599], + [24.346, 1.345, 1.355], + [43.613, 3.2, 3.032], + [2.441, 0.407, 0.394], + [0.562, 0.103, 0.104], + [7.57, 1.413, 1.421], + [0.56, 0.103, 0.105], + [2.292, 0.527, 0.524], + [21.186, 8.449, 8.087], + [0.209, 0.027, 0.027], + [7.387, 1.068, 1.053], + [16.958, 1.623, 1.65], + [21.036, 9.753, 9.784], + [21.314, 3.49, 3.576], + [21.295, 3.524, 3.45], + [3.296, 1.748, 1.727], + [0.664, 0.082, 0.083], + [0.485, 0.045, 0.046], + [0.652, 0.066, 0.062], + [0.978, 0.19, 0.184], + [0.71, 0.072, 0.08], + [0.619, 0.047, 0.048], + [0.501, 0.039, 0.039] + ] +} diff --git a/pg_deltax/start b/pg_deltax/start new file mode 100755 index 0000000000..6c3e837a95 --- /dev/null +++ b/pg_deltax/start @@ -0,0 +1,5 @@ +#!/bin/bash +set -eu + +PGVERSION=${PGVERSION:-18} +sudo systemctl start postgresql@$PGVERSION-main diff --git a/pg_deltax/stop b/pg_deltax/stop new file mode 100755 index 0000000000..e124557497 --- /dev/null +++ b/pg_deltax/stop @@ -0,0 +1,4 @@ +#!/bin/bash + +PGVERSION=${PGVERSION:-18} +sudo systemctl stop postgresql@$PGVERSION-main || true diff --git a/pg_deltax/template.json b/pg_deltax/template.json new file mode 100644 index 0000000000..a581b2642f --- /dev/null +++ b/pg_deltax/template.json @@ -0,0 +1,12 @@ +{ + "system": "pg_deltax", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "Rust", + "PostgreSQL compatible", + "column-oriented", + "time-series" + ] +} From bb44330345ab9b3f1f126d178ff721da59f64a88 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Tue, 19 May 2026 11:38:58 +0200 Subject: [PATCH 2/3] test re-run + stopping condition --- pg_deltax/results/20260519/c6a.4xlarge.json | 94 ++++++++++----------- pg_deltax/stop | 7 +- 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/pg_deltax/results/20260519/c6a.4xlarge.json b/pg_deltax/results/20260519/c6a.4xlarge.json index 26e3751045..15b1581c8d 100644 --- a/pg_deltax/results/20260519/c6a.4xlarge.json +++ b/pg_deltax/results/20260519/c6a.4xlarge.json @@ -7,53 +7,53 @@ "hardware": "cpu", "tuned": "no", "tags": ["Rust", "PostgreSQL compatible", "column-oriented", "time-series"], - "load_time": 369, - "data_size": 14588540607, - "concurrent_qps": 0.425, - "concurrent_error_ratio": 0.899, + "load_time": 362, + "data_size": 14590629567, + "concurrent_qps": 0.078, + "concurrent_error_ratio": 0.161, "result": [ - [0.091, 0.003, 0.004], - [0.793, 0.229, 0.221], - [0.245, 0.03, 0.03], - [0.212, 0.029, 0.027], - [3.032, 0.547, 0.545], - [4.466, 0.445, 0.448], - [0.197, 0.017, 0.017], - [0.599, 0.097, 0.095], - [5.175, 0.802, 0.791], - [6.9, 1.01, 1.009], - [4.052, 0.212, 0.215], - [4.676, 0.302, 0.296], - [5.151, 1.144, 1.124], - [10.087, 2.373, 2.438], - [6.358, 1.268, 1.292], - [4.156, 2.052, 2.122], - [10.862, 2.508, 2.485], - [9.01, 0.678, 0.64], - [21.416, 5.003, 4.904], - [1.055, 0.037, 0.036], - [22.183, 4.58, 4.599], - [24.346, 1.345, 1.355], - [43.613, 3.2, 3.032], - [2.441, 0.407, 0.394], - [0.562, 0.103, 0.104], - [7.57, 1.413, 1.421], - [0.56, 0.103, 0.105], - [2.292, 0.527, 0.524], - [21.186, 8.449, 8.087], - [0.209, 0.027, 0.027], - [7.387, 1.068, 1.053], - [16.958, 1.623, 1.65], - [21.036, 9.753, 9.784], - [21.314, 3.49, 3.576], - [21.295, 3.524, 3.45], - [3.296, 1.748, 1.727], - [0.664, 0.082, 0.083], - [0.485, 0.045, 0.046], - [0.652, 0.066, 0.062], - [0.978, 0.19, 0.184], - [0.71, 0.072, 0.08], - [0.619, 0.047, 0.048], - [0.501, 0.039, 0.039] + [0.088, 0.004, 0.004], + [0.787, 0.236, 0.233], + [0.248, 0.03, 0.03], + [0.208, 0.029, 0.028], + [3.052, 0.561, 0.546], + [4.367, 0.423, 0.422], + [0.201, 0.017, 0.017], + [0.598, 0.097, 0.095], + [5.205, 0.8, 0.79], + [6.916, 1.026, 1.011], + [4.065, 0.229, 0.215], + [4.537, 0.284, 0.288], + [5.249, 1.135, 1.161], + [10.237, 2.386, 2.39], + [6.44, 1.271, 1.24], + [4.098, 2.018, 2.04], + [10.659, 2.467, 2.447], + [8.87, 0.646, 0.644], + [21.048, 4.764, 4.779], + [1.051, 0.035, 0.035], + [22.323, 4.571, 4.558], + [24.426, 1.312, 1.267], + [43.648, 3.096, 2.904], + [2.545, 0.428, 0.425], + [0.552, 0.106, 0.107], + [7.679, 1.433, 1.425], + [0.55, 0.106, 0.104], + [2.292, 0.541, 0.535], + [20.771, 8.415, 8.635], + [0.211, 0.026, 0.026], + [7.117, 1.071, 1.071], + [17.06, 1.65, 1.629], + [21.191, 9.77, 9.878], + [21.426, 3.607, 3.611], + [21.299, 3.586, 3.6], + [3.226, 1.788, 1.739], + [0.645, 0.09, 0.085], + [0.473, 0.048, 0.048], + [0.662, 0.067, 0.067], + [0.978, 0.196, 0.201], + [0.704, 0.07, 0.076], + [0.609, 0.047, 0.046], + [0.476, 0.037, 0.038] ] } diff --git a/pg_deltax/stop b/pg_deltax/stop index e124557497..5f9bd9b842 100755 --- a/pg_deltax/stop +++ b/pg_deltax/stop @@ -1,4 +1,9 @@ #!/bin/bash +# Bound the stop so we never wedge cloud-init: postgresql@.service has +# TimeoutStopSec=infinity, and the concurrent-QPS test can leave the +# postmaster waiting on backend children that were SIGKILLed by earlyoom. +# After 60s of orderly shutdown, escalate to SIGKILL via `systemctl kill`. PGVERSION=${PGVERSION:-18} -sudo systemctl stop postgresql@$PGVERSION-main || true +sudo timeout 60 systemctl stop postgresql@$PGVERSION-main || \ + sudo systemctl kill -s SIGKILL postgresql@$PGVERSION-main || true From 47ce93f21356c6f950dae804fb9f46d2bb21a64d Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Tue, 19 May 2026 15:05:45 +0200 Subject: [PATCH 3/3] pg_deltax/install: clone from canonical xataio/deltax URL The xataio/pg_deltax URL still works via GitHub's 301 redirect, but the canonical repo name is xataio/deltax (matches the link in the PR description). Use the canonical URL directly so the clone does not silently depend on the redirect. --- pg_deltax/install | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pg_deltax/install b/pg_deltax/install index c222053876..00f1b297b0 100755 --- a/pg_deltax/install +++ b/pg_deltax/install @@ -4,7 +4,7 @@ set -eu PGVERSION=${PGVERSION:-18} -PG_DELTAX_REPO=${PG_DELTAX_REPO:-https://github.com/xataio/pg_deltax.git} +PG_DELTAX_REPO=${PG_DELTAX_REPO:-https://github.com/xataio/deltax.git} # Empty by default: don't touch git state. Set to a tag/branch/sha to force # a checkout. Avoids clobbering local iteration when ~/pg_deltax is either a # git checkout on a feature branch or a non-git rsync from `make deploy`.