From 44dad38c3b82feefeedd38a734792e5c26da9486 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 May 2026 23:21:34 +0000 Subject: [PATCH] Add Greengage, WarehousePG, and OpenGPDB Three open-source Greenplum forks that picked up active development after Greenplum went closed source in 2024: - Greengage (https://github.com/GreengageDB/greengage) Installed from the upstream greengage6.deb (6.30.1) in ubuntu:22.04. - OpenGPDB (https://github.com/open-gpdb/gpdb) Installed from the upstream greenplum-db-6 .deb (6.29.8) in ubuntu:22.04. - WarehousePG (https://github.com/warehouse-pg/warehouse-pg) No prebuilt binaries; built from source (7.4.0-WHPG) in rockylinux:9. All three reuse the cloudberry layout: privileged docker container, single-host 14-segment cluster via gpinitsystem, gpfdist load, psql query. data-size runs du inside the container so it reports actual bytes (the cloudberry copy reads /data0 on the host, which isn't bind-mounted). Resolves #888 Co-Authored-By: Claude Opus 4.7 --- greengage/README.md | 17 ++++ greengage/benchmark.sh | 5 + greengage/check | 7 ++ greengage/create.sql | 116 ++++++++++++++++++++++++ greengage/data-size | 9 ++ greengage/gpinitsystem_config | 64 +++++++++++++ greengage/install | 129 ++++++++++++++++++++++++++ greengage/load | 30 ++++++ greengage/queries.sql | 43 +++++++++ greengage/query | 30 ++++++ greengage/start | 12 +++ greengage/stop | 8 ++ greengage/template.json | 12 +++ opengpdb/README.md | 17 ++++ opengpdb/benchmark.sh | 5 + opengpdb/check | 7 ++ opengpdb/create.sql | 116 ++++++++++++++++++++++++ opengpdb/data-size | 9 ++ opengpdb/gpinitsystem_config | 64 +++++++++++++ opengpdb/install | 127 ++++++++++++++++++++++++++ opengpdb/load | 26 ++++++ opengpdb/queries.sql | 43 +++++++++ opengpdb/query | 30 ++++++ opengpdb/start | 9 ++ opengpdb/stop | 6 ++ opengpdb/template.json | 12 +++ warehousepg/README.md | 18 ++++ warehousepg/benchmark.sh | 5 + warehousepg/check | 7 ++ warehousepg/create.sql | 116 ++++++++++++++++++++++++ warehousepg/data-size | 9 ++ warehousepg/gpinitsystem_config | 64 +++++++++++++ warehousepg/install | 156 ++++++++++++++++++++++++++++++++ warehousepg/load | 26 ++++++ warehousepg/queries.sql | 43 +++++++++ warehousepg/query | 30 ++++++ warehousepg/start | 9 ++ warehousepg/stop | 6 ++ warehousepg/template.json | 12 +++ 39 files changed, 1454 insertions(+) create mode 100644 greengage/README.md create mode 100755 greengage/benchmark.sh create mode 100755 greengage/check create mode 100644 greengage/create.sql create mode 100755 greengage/data-size create mode 100644 greengage/gpinitsystem_config create mode 100755 greengage/install create mode 100755 greengage/load create mode 100644 greengage/queries.sql create mode 100755 greengage/query create mode 100755 greengage/start create mode 100755 greengage/stop create mode 100644 greengage/template.json create mode 100644 opengpdb/README.md create mode 100755 opengpdb/benchmark.sh create mode 100755 opengpdb/check create mode 100644 opengpdb/create.sql create mode 100755 opengpdb/data-size create mode 100644 opengpdb/gpinitsystem_config create mode 100755 opengpdb/install create mode 100755 opengpdb/load create mode 100644 opengpdb/queries.sql create mode 100755 opengpdb/query create mode 100755 opengpdb/start create mode 100755 opengpdb/stop create mode 100644 opengpdb/template.json create mode 100644 warehousepg/README.md create mode 100755 warehousepg/benchmark.sh create mode 100755 warehousepg/check create mode 100644 warehousepg/create.sql create mode 100755 warehousepg/data-size create mode 100644 warehousepg/gpinitsystem_config create mode 100755 warehousepg/install create mode 100755 warehousepg/load create mode 100644 warehousepg/queries.sql create mode 100755 warehousepg/query create mode 100755 warehousepg/start create mode 100755 warehousepg/stop create mode 100644 warehousepg/template.json diff --git a/greengage/README.md b/greengage/README.md new file mode 100644 index 0000000000..7817f501e7 --- /dev/null +++ b/greengage/README.md @@ -0,0 +1,17 @@ +Greengage DB is an open-source MPP database based on Greenplum, +maintained by the GreengageDB community after Greenplum went closed +source in 2024. https://github.com/GreengageDB/greengage + +The benchmark runs Greengage inside a privileged Ubuntu 22.04 docker +container, so the host can be any distro with docker. The install +script fetches the upstream `greengage6.deb` (no source build) and +initializes a single-host, 14-segment cluster via gpinitsystem. + +The other scripts (start/stop/check/load/query) `docker exec` into the +running container. + +To run the test: + +``` +./benchmark.sh +``` diff --git a/greengage/benchmark.sh b/greengage/benchmark.sh new file mode 100755 index 0000000000..432bd3ae17 --- /dev/null +++ b/greengage/benchmark.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Thin shim — actual flow is in lib/benchmark-common.sh. +export BENCH_DOWNLOAD_SCRIPT="download-hits-tsv" +export BENCH_DURABLE=yes +exec ../lib/benchmark-common.sh diff --git a/greengage/check b/greengage/check new file mode 100755 index 0000000000..4deaeee469 --- /dev/null +++ b/greengage/check @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +NAME=clickbench-greengage + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -t -c "SELECT 1"' >/dev/null diff --git a/greengage/create.sql b/greengage/create.sql new file mode 100644 index 0000000000..a2ae8a7eea --- /dev/null +++ b/greengage/create.sql @@ -0,0 +1,116 @@ +drop table if exists hits; +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +) +with (appendoptimized=true,orientation=column,compresstype=zstd) +DISTRIBUTED BY (UserID); +CREATE INDEX hits_idx on hits using btree (CounterID, EventDate, UserID, EventTime, WatchID); +drop external table if exists hits_ext; +CREATE EXTERNAL TABLE hits_ext (like hits) +LOCATION ('gpfdist://localhost:8080/hits.tsv') +FORMAT 'TEXT'; diff --git a/greengage/data-size b/greengage/data-size new file mode 100755 index 0000000000..74b16526ce --- /dev/null +++ b/greengage/data-size @@ -0,0 +1,9 @@ +#!/bin/bash +# The cluster's data lives inside the container under /data0, not on the +# host. Run du inside the container so we see the actual bytes. +set -eu + +NAME=clickbench-greengage + +sudo docker exec "$NAME" du -bcs /data0 2>/dev/null | \ + awk '/total$/ { print $1 }' diff --git a/greengage/gpinitsystem_config b/greengage/gpinitsystem_config new file mode 100644 index 0000000000..0c5a4701d7 --- /dev/null +++ b/greengage/gpinitsystem_config @@ -0,0 +1,64 @@ +# FILE NAME: gpinitsystem_config + +# Configuration file needed by the gpinitsystem + +################################################ +#### REQUIRED PARAMETERS +################################################ + +#### Naming convention for utility-generated data directories. +SEG_PREFIX=gpseg + +#### Base number by which primary segment port numbers +#### are calculated. +PORT_BASE=6000 + +#### File system location(s) where primary segment data directories +#### will be created. The number of locations in the list dictate +#### the number of primary segments that will get created per +#### physical host (if multiple addresses for a host are listed in +#### the hostfile, the number of segments will be spread evenly across +#### the specified interface addresses). +declare -a DATA_DIRECTORY=(/data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary) + +#### OS-configured hostname or IP address of the coordinator host. +COORDINATOR_HOSTNAME=localhost + +#### File system location where the coordinator data directory +#### will be created. +COORDINATOR_DIRECTORY=/data0/coordinator + +#### Port number for the coordinator instance. +COORDINATOR_PORT=5432 + +#### Shell utility used to connect to remote hosts. +TRUSTED_SHELL=ssh + +#### Default server-side character set encoding. +ENCODING=UNICODE + +################################################ +#### OPTIONAL MIRROR PARAMETERS +################################################ + +#### Base number by which mirror segment port numbers +#### are calculated. +#MIRROR_PORT_BASE=7000 + +#### File system location(s) where mirror segment data directories +#### will be created. The number of mirror locations must equal the +#### number of primary locations as specified in the +#### DATA_DIRECTORY parameter. +#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror) + + +################################################ +#### OTHER OPTIONAL PARAMETERS +################################################ + +#### Create a database of this name after initialization. +#DATABASE_NAME=name_of_database + +#### Specify the location of the host address file here instead of +#### with the -h option of gpinitsystem. +#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem diff --git a/greengage/install b/greengage/install new file mode 100755 index 0000000000..edbb662b6b --- /dev/null +++ b/greengage/install @@ -0,0 +1,129 @@ +#!/bin/bash +# Greengage's prebuilt .deb (greengage6.deb) targets Ubuntu 22.04 (libssl3, +# libperl5.34, etc.) and the cluster init requires kernel tuning, sshd, and +# the gpadmin user — none of which we want to impose on the benchmark host. +# Run everything inside a privileged ubuntu:22.04 container instead; the +# other scripts (start/stop/check/load/query) docker-exec into it. +# +# Idempotent: re-running this script after a successful install does nothing. +set -eu + +NAME=clickbench-greengage +IMAGE=ubuntu:22.04 +GG_VERSION=6.30.1 +DEB_URL="https://github.com/GreengageDB/greengage/releases/download/${GG_VERSION}/greengage6.deb" + +# 1. Make sure docker is installed and the daemon is up. +if ! command -v docker >/dev/null 2>&1; then + if command -v apt-get >/dev/null 2>&1; then + sudo apt-get update -y + sudo apt-get install -y docker.io + elif command -v yum >/dev/null 2>&1; then + sudo yum install -y docker + else + echo "greengage/install: install docker manually first" >&2 + exit 1 + fi +fi +sudo systemctl start docker 2>/dev/null || sudo service docker start || true + +# 2. Skip if the container is already created and the cluster is initialized. +if sudo docker container inspect "$NAME" >/dev/null 2>&1; then + if sudo docker exec "$NAME" test -d /data0/coordinator/gpseg-1 2>/dev/null; then + echo "greengage: container '$NAME' already initialized; nothing to do" >&2 + exit 0 + fi +fi + +sudo docker pull "$IMAGE" + +# 3. Long-lived privileged container. Bridge mode + --hostname localhost so +# gpinitsystem's ssh-to-localhost lands on the container's own sshd. Publish +# 5432 so clients on the host can also reach postgres if desired. +if ! sudo docker container inspect "$NAME" >/dev/null 2>&1; then + sudo docker run -d --privileged --hostname localhost -p 5432:5432 \ + --name "$NAME" "$IMAGE" sleep infinity +fi + +# 4. Inside the container: install the .deb, kernel tuning, gpadmin user, +# sshd. The .deb itself ships no postinst, so the gpadmin user has to be +# created by us. +sudo docker exec -i "$NAME" bash -s </dev/null +sysctl -w kernel.shmmax=\$SHMAX >/dev/null +sysctl -w kernel.shmmni=4096 >/dev/null +sysctl -w vm.overcommit_memory=2 >/dev/null +sysctl -w vm.overcommit_ratio=95 >/dev/null +sysctl -w net.ipv4.ip_local_port_range="10000 65535" >/dev/null +sysctl -w kernel.sem="250 2048000 200 8192" >/dev/null + +cat > /etc/security/limits.d/99-gpadmin.conf </dev/null 2>&1 || useradd gpadmin -m -g gpadmin -s /bin/bash +echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin +sudo -u gpadmin bash -c ' + set -e + mkdir -p ~/.ssh + chmod 700 ~/.ssh + [ -f ~/.ssh/id_rsa ] || ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa + cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys + chmod 600 ~/.ssh/authorized_keys +' + +# /bin/ping in this image has no caps, so even root can't open a raw socket. +# gpinitsystem pings localhost to verify reachability. +setcap cap_net_raw+ep /bin/ping +[ -e /bin/ping6 ] && setcap cap_net_raw+ep /bin/ping6 || true + +# sshd setup. +ssh-keygen -A +mkdir -p /run/sshd +echo "PasswordAuthentication no" >> /etc/ssh/sshd_config +echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config +/usr/sbin/sshd + +# Data dirs and gpadmin shell env. +mkdir -p /data0/primary /data0/coordinator +chown -R gpadmin:gpadmin /data0 +grep -q greengage_path /home/gpadmin/.bashrc || \ + echo "source /opt/greengagedb/greengage/greengage_path.sh" >> /home/gpadmin/.bashrc +grep -q COORDINATOR_DATA_DIRECTORY /home/gpadmin/.bashrc || \ + echo "export COORDINATOR_DATA_DIRECTORY=/data0/coordinator/gpseg-1" >> /home/gpadmin/.bashrc + +echo localhost > /home/gpadmin/hosts +chown gpadmin:gpadmin /home/gpadmin/hosts +EOF + +# 5. Drop gpinitsystem_config in and run gpinitsystem. +sudo docker cp "$(dirname "$0")/gpinitsystem_config" \ + "$NAME":/home/gpadmin/gpinitsystem_config +sudo docker exec "$NAME" chown gpadmin:gpadmin /home/gpadmin/gpinitsystem_config + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'gpinitsystem -a -c gpinitsystem_config -h hosts' diff --git a/greengage/load b/greengage/load new file mode 100755 index 0000000000..20ff71abd3 --- /dev/null +++ b/greengage/load @@ -0,0 +1,30 @@ +#!/bin/bash +set -eu + +NAME=clickbench-greengage + +# Stream hits.tsv and create.sql into the container via tar so that any +# symlinks on the host (e.g. hits.tsv -> /elsewhere) are dereferenced. +# `tar -h` follows symlinks; piping through `docker exec -i ... tar` +# avoids `docker cp`'s symlink-passes-through behavior. +tar -ch hits.tsv create.sql | \ + sudo docker exec -i "$NAME" tar -xC /home/gpadmin +sudo docker exec "$NAME" chown gpadmin:gpadmin \ + /home/gpadmin/hits.tsv /home/gpadmin/create.sql + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -f /home/gpadmin/create.sql' + +# gpfdist serves hits.tsv to the gpfdist:// foreign table referenced by +# hits_ext. Spawn it once; subsequent loads reuse the running daemon. +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'pgrep -u gpadmin gpfdist || nohup gpfdist -d /home/gpadmin >/tmp/gpfdist.log 2>&1 &' + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -t -c "INSERT INTO hits SELECT * FROM hits_ext;"' +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -t -c "ANALYZE hits;"' + +sudo docker exec "$NAME" rm -f /home/gpadmin/hits.tsv +rm -f hits.tsv +sync diff --git a/greengage/queries.sql b/greengage/queries.sql new file mode 100644 index 0000000000..31f65fc898 --- /dev/null +++ b/greengage/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/greengage/query b/greengage/query new file mode 100755 index 0000000000..24149ac4da --- /dev/null +++ b/greengage/query @@ -0,0 +1,30 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it via psql against the postgres DB +# inside the clickbench-greengage container as gpadmin. +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line. +# Exit non-zero on error. +set -e + +NAME=clickbench-greengage +query=$(cat) + +out=$(printf '\\timing\n%s\n' "$query" | \ + sudo docker exec -i "$NAME" sudo -iu gpadmin bash -lc 'psql -d postgres -t' 2>&1) +status=$? + +if printf '%s\n' "$out" | grep -q '^ERROR\|psql: error'; then + printf '%s\n' "$out" >&2 + exit 1 +fi + +printf '%s\n' "$out" | grep -v '^Time:' + +time_ms=$(printf '%s\n' "$out" | grep -oP 'Time:\s+\K[0-9]+\.[0-9]+' | tail -n1) +if [ -z "$time_ms" ]; then + echo "no timing in psql output" >&2 + exit 1 +fi +awk -v ms="$time_ms" 'BEGIN { printf "%.3f\n", ms / 1000 }' >&2 + +exit "$status" diff --git a/greengage/start b/greengage/start new file mode 100755 index 0000000000..d3a7bc56cf --- /dev/null +++ b/greengage/start @@ -0,0 +1,12 @@ +#!/bin/bash +set -eu + +NAME=clickbench-greengage + +# Bring the docker container up if it isn't already. +sudo docker start "$NAME" >/dev/null + +# sshd doesn't auto-start in the container; gpstart needs ssh to localhost. +# Then bring the cluster up. gpstart is a no-op if it's already running. +sudo docker exec "$NAME" bash -c 'pgrep sshd >/dev/null || /usr/sbin/sshd' +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc 'gpstart -a' || true diff --git a/greengage/stop b/greengage/stop new file mode 100755 index 0000000000..e53f7e3f62 --- /dev/null +++ b/greengage/stop @@ -0,0 +1,8 @@ +#!/bin/bash + +NAME=clickbench-greengage + +# Bring the cluster down inside the container, then pause the container. +# Both steps tolerate "already stopped". +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc 'gpstop -a' 2>/dev/null || true +sudo docker stop "$NAME" >/dev/null 2>&1 || true diff --git a/greengage/template.json b/greengage/template.json new file mode 100644 index 0000000000..17e83676c1 --- /dev/null +++ b/greengage/template.json @@ -0,0 +1,12 @@ +{ + "system": "Greengage", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C", + "column-oriented", + "PostgreSQL compatible", + "lukewarm-cold-run" + ] +} diff --git a/opengpdb/README.md b/opengpdb/README.md new file mode 100644 index 0000000000..3913f7fc2c --- /dev/null +++ b/opengpdb/README.md @@ -0,0 +1,17 @@ +Open-GPDB is an open-source fork of Greenplum 6, maintained by the +Yandex Managed Greenplum team after upstream Greenplum went closed +source in 2024. https://github.com/open-gpdb/gpdb + +The benchmark runs Open-GPDB inside a privileged Ubuntu 22.04 docker +container, so the host can be any distro with docker. The install +script fetches the upstream `.deb` (no source build) and initializes a +single-host, 14-segment cluster via gpinitsystem. + +The other scripts (start/stop/check/load/query) `docker exec` into the +running container. + +To run the test: + +``` +./benchmark.sh +``` diff --git a/opengpdb/benchmark.sh b/opengpdb/benchmark.sh new file mode 100755 index 0000000000..432bd3ae17 --- /dev/null +++ b/opengpdb/benchmark.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Thin shim — actual flow is in lib/benchmark-common.sh. +export BENCH_DOWNLOAD_SCRIPT="download-hits-tsv" +export BENCH_DURABLE=yes +exec ../lib/benchmark-common.sh diff --git a/opengpdb/check b/opengpdb/check new file mode 100755 index 0000000000..9bc684e6a2 --- /dev/null +++ b/opengpdb/check @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +NAME=clickbench-opengpdb + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -t -c "SELECT 1"' >/dev/null diff --git a/opengpdb/create.sql b/opengpdb/create.sql new file mode 100644 index 0000000000..a2ae8a7eea --- /dev/null +++ b/opengpdb/create.sql @@ -0,0 +1,116 @@ +drop table if exists hits; +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +) +with (appendoptimized=true,orientation=column,compresstype=zstd) +DISTRIBUTED BY (UserID); +CREATE INDEX hits_idx on hits using btree (CounterID, EventDate, UserID, EventTime, WatchID); +drop external table if exists hits_ext; +CREATE EXTERNAL TABLE hits_ext (like hits) +LOCATION ('gpfdist://localhost:8080/hits.tsv') +FORMAT 'TEXT'; diff --git a/opengpdb/data-size b/opengpdb/data-size new file mode 100755 index 0000000000..1bd38f3d2d --- /dev/null +++ b/opengpdb/data-size @@ -0,0 +1,9 @@ +#!/bin/bash +# The cluster's data lives inside the container under /data0, not on the +# host. Run du inside the container so we see the actual bytes. +set -eu + +NAME=clickbench-opengpdb + +sudo docker exec "$NAME" du -bcs /data0 2>/dev/null | \ + awk '/total$/ { print $1 }' diff --git a/opengpdb/gpinitsystem_config b/opengpdb/gpinitsystem_config new file mode 100644 index 0000000000..0c5a4701d7 --- /dev/null +++ b/opengpdb/gpinitsystem_config @@ -0,0 +1,64 @@ +# FILE NAME: gpinitsystem_config + +# Configuration file needed by the gpinitsystem + +################################################ +#### REQUIRED PARAMETERS +################################################ + +#### Naming convention for utility-generated data directories. +SEG_PREFIX=gpseg + +#### Base number by which primary segment port numbers +#### are calculated. +PORT_BASE=6000 + +#### File system location(s) where primary segment data directories +#### will be created. The number of locations in the list dictate +#### the number of primary segments that will get created per +#### physical host (if multiple addresses for a host are listed in +#### the hostfile, the number of segments will be spread evenly across +#### the specified interface addresses). +declare -a DATA_DIRECTORY=(/data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary) + +#### OS-configured hostname or IP address of the coordinator host. +COORDINATOR_HOSTNAME=localhost + +#### File system location where the coordinator data directory +#### will be created. +COORDINATOR_DIRECTORY=/data0/coordinator + +#### Port number for the coordinator instance. +COORDINATOR_PORT=5432 + +#### Shell utility used to connect to remote hosts. +TRUSTED_SHELL=ssh + +#### Default server-side character set encoding. +ENCODING=UNICODE + +################################################ +#### OPTIONAL MIRROR PARAMETERS +################################################ + +#### Base number by which mirror segment port numbers +#### are calculated. +#MIRROR_PORT_BASE=7000 + +#### File system location(s) where mirror segment data directories +#### will be created. The number of mirror locations must equal the +#### number of primary locations as specified in the +#### DATA_DIRECTORY parameter. +#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror) + + +################################################ +#### OTHER OPTIONAL PARAMETERS +################################################ + +#### Create a database of this name after initialization. +#DATABASE_NAME=name_of_database + +#### Specify the location of the host address file here instead of +#### with the -h option of gpinitsystem. +#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem diff --git a/opengpdb/install b/opengpdb/install new file mode 100755 index 0000000000..b42e424b77 --- /dev/null +++ b/opengpdb/install @@ -0,0 +1,127 @@ +#!/bin/bash +# Open-GPDB ships an Ubuntu 22.04 .deb (greenplum-db-6__amd64.deb) but +# cluster init still requires kernel tuning, sshd, and the gpadmin user. +# Run everything inside a privileged ubuntu:22.04 container instead; the +# other scripts (start/stop/check/load/query) docker-exec into it. +# +# Idempotent: re-running this script after a successful install does nothing. +set -eu + +NAME=clickbench-opengpdb +IMAGE=ubuntu:22.04 +OG_VERSION=6.29.8 +DEB_FILE="greenplum-db-6_${OG_VERSION}-1-yandex.54453.5ad8245cbc_amd64.deb" +DEB_URL="https://github.com/open-gpdb/gpdb/releases/download/${OG_VERSION}/${DEB_FILE}" + +# 1. Make sure docker is installed and the daemon is up. +if ! command -v docker >/dev/null 2>&1; then + if command -v apt-get >/dev/null 2>&1; then + sudo apt-get update -y + sudo apt-get install -y docker.io + elif command -v yum >/dev/null 2>&1; then + sudo yum install -y docker + else + echo "opengpdb/install: install docker manually first" >&2 + exit 1 + fi +fi +sudo systemctl start docker 2>/dev/null || sudo service docker start || true + +# 2. Skip if the container is already created and the cluster is initialized. +if sudo docker container inspect "$NAME" >/dev/null 2>&1; then + if sudo docker exec "$NAME" test -d /data0/coordinator/gpseg-1 2>/dev/null; then + echo "opengpdb: container '$NAME' already initialized; nothing to do" >&2 + exit 0 + fi +fi + +sudo docker pull "$IMAGE" + +# 3. Long-lived privileged container. Bridge mode + --hostname localhost so +# gpinitsystem's ssh-to-localhost lands on the container's own sshd. +if ! sudo docker container inspect "$NAME" >/dev/null 2>&1; then + sudo docker run -d --privileged --hostname localhost -p 5432:5432 \ + --name "$NAME" "$IMAGE" sleep infinity +fi + +# 4. Inside the container: install the .deb, kernel tuning, sshd setup. +# The .deb's preinst creates the gpadmin user; we still need passwordless +# ssh and the kernel knobs gpinitsystem checks. +sudo docker exec -i "$NAME" bash -s </dev/null +sysctl -w kernel.shmmax=\$SHMAX >/dev/null +sysctl -w kernel.shmmni=4096 >/dev/null +sysctl -w vm.overcommit_memory=2 >/dev/null +sysctl -w vm.overcommit_ratio=95 >/dev/null +sysctl -w net.ipv4.ip_local_port_range="10000 65535" >/dev/null +sysctl -w kernel.sem="250 2048000 200 8192" >/dev/null + +cat > /etc/security/limits.d/99-gpadmin.conf </dev/null 2>&1 || useradd gpadmin -m -g gpadmin -s /bin/bash +chsh -s /bin/bash gpadmin || true +echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin +sudo -u gpadmin bash -c ' + set -e + mkdir -p ~/.ssh + chmod 700 ~/.ssh + [ -f ~/.ssh/id_rsa ] || ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa + cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys + chmod 600 ~/.ssh/authorized_keys +' + +# /bin/ping in this image has no caps. gpinitsystem pings localhost. +setcap cap_net_raw+ep /bin/ping +[ -e /bin/ping6 ] && setcap cap_net_raw+ep /bin/ping6 || true + +# sshd setup. +ssh-keygen -A +mkdir -p /run/sshd +echo "PasswordAuthentication no" >> /etc/ssh/sshd_config +echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config +/usr/sbin/sshd + +# Data dirs and gpadmin shell env. +mkdir -p /data0/primary /data0/coordinator +chown -R gpadmin:gpadmin /data0 +grep -q greenplum_path /home/gpadmin/.bashrc 2>/dev/null || \ + echo "source /opt/greenplum-db-6/greenplum_path.sh" >> /home/gpadmin/.bashrc +grep -q COORDINATOR_DATA_DIRECTORY /home/gpadmin/.bashrc 2>/dev/null || \ + echo "export COORDINATOR_DATA_DIRECTORY=/data0/coordinator/gpseg-1" >> /home/gpadmin/.bashrc + +echo localhost > /home/gpadmin/hosts +chown gpadmin:gpadmin /home/gpadmin/hosts +EOF + +# 5. Drop gpinitsystem_config in and run gpinitsystem. +sudo docker cp "$(dirname "$0")/gpinitsystem_config" \ + "$NAME":/home/gpadmin/gpinitsystem_config +sudo docker exec "$NAME" chown gpadmin:gpadmin /home/gpadmin/gpinitsystem_config + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'gpinitsystem -a -c gpinitsystem_config -h hosts' diff --git a/opengpdb/load b/opengpdb/load new file mode 100755 index 0000000000..d3d38b2b00 --- /dev/null +++ b/opengpdb/load @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu + +NAME=clickbench-opengpdb + +# Stream hits.tsv and create.sql into the container via tar so host-side +# symlinks are dereferenced (docker cp would pass them through). +tar -ch hits.tsv create.sql | \ + sudo docker exec -i "$NAME" tar -xC /home/gpadmin +sudo docker exec "$NAME" chown gpadmin:gpadmin \ + /home/gpadmin/hits.tsv /home/gpadmin/create.sql + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -f /home/gpadmin/create.sql' + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'pgrep -u gpadmin gpfdist || nohup gpfdist -d /home/gpadmin >/tmp/gpfdist.log 2>&1 &' + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -t -c "INSERT INTO hits SELECT * FROM hits_ext;"' +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -t -c "ANALYZE hits;"' + +sudo docker exec "$NAME" rm -f /home/gpadmin/hits.tsv +rm -f hits.tsv +sync diff --git a/opengpdb/queries.sql b/opengpdb/queries.sql new file mode 100644 index 0000000000..31f65fc898 --- /dev/null +++ b/opengpdb/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/opengpdb/query b/opengpdb/query new file mode 100755 index 0000000000..6cba0197b5 --- /dev/null +++ b/opengpdb/query @@ -0,0 +1,30 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it via psql against the postgres DB +# inside the clickbench-opengpdb container as gpadmin. +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line. +# Exit non-zero on error. +set -e + +NAME=clickbench-opengpdb +query=$(cat) + +out=$(printf '\\timing\n%s\n' "$query" | \ + sudo docker exec -i "$NAME" sudo -iu gpadmin bash -lc 'psql -d postgres -t' 2>&1) +status=$? + +if printf '%s\n' "$out" | grep -q '^ERROR\|psql: error'; then + printf '%s\n' "$out" >&2 + exit 1 +fi + +printf '%s\n' "$out" | grep -v '^Time:' + +time_ms=$(printf '%s\n' "$out" | grep -oP 'Time:\s+\K[0-9]+\.[0-9]+' | tail -n1) +if [ -z "$time_ms" ]; then + echo "no timing in psql output" >&2 + exit 1 +fi +awk -v ms="$time_ms" 'BEGIN { printf "%.3f\n", ms / 1000 }' >&2 + +exit "$status" diff --git a/opengpdb/start b/opengpdb/start new file mode 100755 index 0000000000..6af223a565 --- /dev/null +++ b/opengpdb/start @@ -0,0 +1,9 @@ +#!/bin/bash +set -eu + +NAME=clickbench-opengpdb + +sudo docker start "$NAME" >/dev/null + +sudo docker exec "$NAME" bash -c 'pgrep sshd >/dev/null || /usr/sbin/sshd' +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc 'gpstart -a' || true diff --git a/opengpdb/stop b/opengpdb/stop new file mode 100755 index 0000000000..6c9b9df52b --- /dev/null +++ b/opengpdb/stop @@ -0,0 +1,6 @@ +#!/bin/bash + +NAME=clickbench-opengpdb + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc 'gpstop -a' 2>/dev/null || true +sudo docker stop "$NAME" >/dev/null 2>&1 || true diff --git a/opengpdb/template.json b/opengpdb/template.json new file mode 100644 index 0000000000..65b0c379e7 --- /dev/null +++ b/opengpdb/template.json @@ -0,0 +1,12 @@ +{ + "system": "OpenGPDB", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C", + "column-oriented", + "PostgreSQL compatible", + "lukewarm-cold-run" + ] +} diff --git a/warehousepg/README.md b/warehousepg/README.md new file mode 100644 index 0000000000..884d624e2b --- /dev/null +++ b/warehousepg/README.md @@ -0,0 +1,18 @@ +WarehousePG (WHPG) is an open-source Greenplum 7 fork, picking up the +Greenplum OSS project after it went closed source in 2024. +https://github.com/warehouse-pg/warehouse-pg + +The benchmark runs WarehousePG inside a privileged Rocky 9 docker +container, so the host can be any distro with docker. WarehousePG ships +no prebuilt binaries; the install script builds it from source inside +the container (~10–20 min on first install) and initializes a +single-host, 14-segment cluster via gpinitsystem. + +The other scripts (start/stop/check/load/query) `docker exec` into the +running container. + +To run the test: + +``` +./benchmark.sh +``` diff --git a/warehousepg/benchmark.sh b/warehousepg/benchmark.sh new file mode 100755 index 0000000000..432bd3ae17 --- /dev/null +++ b/warehousepg/benchmark.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Thin shim — actual flow is in lib/benchmark-common.sh. +export BENCH_DOWNLOAD_SCRIPT="download-hits-tsv" +export BENCH_DURABLE=yes +exec ../lib/benchmark-common.sh diff --git a/warehousepg/check b/warehousepg/check new file mode 100755 index 0000000000..ca20891967 --- /dev/null +++ b/warehousepg/check @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +NAME=clickbench-warehousepg + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -t -c "SELECT 1"' >/dev/null diff --git a/warehousepg/create.sql b/warehousepg/create.sql new file mode 100644 index 0000000000..a2ae8a7eea --- /dev/null +++ b/warehousepg/create.sql @@ -0,0 +1,116 @@ +drop table if exists hits; +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +) +with (appendoptimized=true,orientation=column,compresstype=zstd) +DISTRIBUTED BY (UserID); +CREATE INDEX hits_idx on hits using btree (CounterID, EventDate, UserID, EventTime, WatchID); +drop external table if exists hits_ext; +CREATE EXTERNAL TABLE hits_ext (like hits) +LOCATION ('gpfdist://localhost:8080/hits.tsv') +FORMAT 'TEXT'; diff --git a/warehousepg/data-size b/warehousepg/data-size new file mode 100755 index 0000000000..7b3cfecb27 --- /dev/null +++ b/warehousepg/data-size @@ -0,0 +1,9 @@ +#!/bin/bash +# The cluster's data lives inside the container under /data0, not on the +# host. Run du inside the container so we see the actual bytes. +set -eu + +NAME=clickbench-warehousepg + +sudo docker exec "$NAME" du -bcs /data0 2>/dev/null | \ + awk '/total$/ { print $1 }' diff --git a/warehousepg/gpinitsystem_config b/warehousepg/gpinitsystem_config new file mode 100644 index 0000000000..0c5a4701d7 --- /dev/null +++ b/warehousepg/gpinitsystem_config @@ -0,0 +1,64 @@ +# FILE NAME: gpinitsystem_config + +# Configuration file needed by the gpinitsystem + +################################################ +#### REQUIRED PARAMETERS +################################################ + +#### Naming convention for utility-generated data directories. +SEG_PREFIX=gpseg + +#### Base number by which primary segment port numbers +#### are calculated. +PORT_BASE=6000 + +#### File system location(s) where primary segment data directories +#### will be created. The number of locations in the list dictate +#### the number of primary segments that will get created per +#### physical host (if multiple addresses for a host are listed in +#### the hostfile, the number of segments will be spread evenly across +#### the specified interface addresses). +declare -a DATA_DIRECTORY=(/data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary) + +#### OS-configured hostname or IP address of the coordinator host. +COORDINATOR_HOSTNAME=localhost + +#### File system location where the coordinator data directory +#### will be created. +COORDINATOR_DIRECTORY=/data0/coordinator + +#### Port number for the coordinator instance. +COORDINATOR_PORT=5432 + +#### Shell utility used to connect to remote hosts. +TRUSTED_SHELL=ssh + +#### Default server-side character set encoding. +ENCODING=UNICODE + +################################################ +#### OPTIONAL MIRROR PARAMETERS +################################################ + +#### Base number by which mirror segment port numbers +#### are calculated. +#MIRROR_PORT_BASE=7000 + +#### File system location(s) where mirror segment data directories +#### will be created. The number of mirror locations must equal the +#### number of primary locations as specified in the +#### DATA_DIRECTORY parameter. +#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror) + + +################################################ +#### OTHER OPTIONAL PARAMETERS +################################################ + +#### Create a database of this name after initialization. +#DATABASE_NAME=name_of_database + +#### Specify the location of the host address file here instead of +#### with the -h option of gpinitsystem. +#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem diff --git a/warehousepg/install b/warehousepg/install new file mode 100755 index 0000000000..69c7230d7a --- /dev/null +++ b/warehousepg/install @@ -0,0 +1,156 @@ +#!/bin/bash +# WarehousePG (a Greenplum 7 fork) only documents a RHEL/Rocky build path +# and ships no prebuilt binaries. To make the benchmark portable, run +# everything inside a Rocky 9 container; the other scripts (start/stop/ +# check/load/query) docker-exec into this container. +# +# Idempotent: re-running this script after a successful install does nothing. +set -eu + +NAME=clickbench-warehousepg +IMAGE=rockylinux:9 +WHPG_VERSION=7.4.0-WHPG + +# 1. Make sure docker is installed and the daemon is up. +if ! command -v docker >/dev/null 2>&1; then + if command -v apt-get >/dev/null 2>&1; then + sudo apt-get update -y + sudo apt-get install -y docker.io + elif command -v yum >/dev/null 2>&1; then + sudo yum install -y docker + else + echo "warehousepg/install: install docker manually first" >&2 + exit 1 + fi +fi +sudo systemctl start docker 2>/dev/null || sudo service docker start || true + +# 2. Skip if the container is already created and the cluster is initialized. +if sudo docker container inspect "$NAME" >/dev/null 2>&1; then + if sudo docker exec "$NAME" test -d /data0/coordinator/gpseg-1 2>/dev/null; then + echo "warehousepg: container '$NAME' already initialized; nothing to do" >&2 + exit 0 + fi +fi + +sudo docker pull "$IMAGE" + +# 3. Long-lived privileged container. Bridge mode + --hostname localhost so +# gpinitsystem's ssh-to-localhost lands on the container's own sshd. +if ! sudo docker container inspect "$NAME" >/dev/null 2>&1; then + sudo docker run -d --privileged --hostname localhost -p 5432:5432 \ + --name "$NAME" "$IMAGE" sleep infinity +fi + +# 4. Inside the container: install build deps, kernel tuning, gpadmin +# user, sshd, build WarehousePG from source, init a single-segment +# cluster. The dep list mirrors WHPG's own README.RHEL-Rocky.bash. +sudo docker exec -i "$NAME" bash -s </dev/null +sysctl -w kernel.shmmax=\$SHMAX >/dev/null +sysctl -w kernel.shmmni=4096 >/dev/null +sysctl -w vm.overcommit_memory=2 >/dev/null +sysctl -w vm.overcommit_ratio=95 >/dev/null +sysctl -w net.ipv4.ip_local_port_range="10000 65535" >/dev/null +sysctl -w kernel.sem="250 2048000 200 8192" >/dev/null + +cat > /etc/security/limits.d/99-gpadmin.conf </dev/null 2>&1 || useradd gpadmin -m -g gpadmin +echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin +sudo -u gpadmin bash -c ' + set -e + mkdir -p ~/.ssh + chmod 700 ~/.ssh + [ -f ~/.ssh/id_rsa ] || ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa + cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys + chmod 600 ~/.ssh/authorized_keys +' + +# /bin/ping in rockylinux:9 has no caps; gpinitsystem pings localhost. +setcap cap_net_raw+ep /bin/ping +[ -e /bin/ping6 ] && setcap cap_net_raw+ep /bin/ping6 || true + +# sshd setup. +ssh-keygen -A +mkdir -p /run/sshd +echo "PasswordAuthentication no" >> /etc/ssh/sshd_config +echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config +/usr/sbin/sshd + +# Build WarehousePG from source if not already built. +if [ ! -x /usr/local/whpg/bin/postgres ]; then + cd /tmp + if [ ! -f whpg-${WHPG_VERSION}.tar.gz ]; then + curl -fsSL -o whpg-${WHPG_VERSION}.tar.gz \ + https://github.com/warehouse-pg/warehouse-pg/archive/refs/tags/${WHPG_VERSION}.tar.gz + fi + rm -rf warehouse-pg-${WHPG_VERSION} + tar -xzf whpg-${WHPG_VERSION}.tar.gz + cd warehouse-pg-${WHPG_VERSION} + echo -e "/usr/local/lib\n/usr/local/lib64" >> /etc/ld.so.conf + ldconfig + ./configure --with-perl --with-python --with-libxml --with-gssapi \ + --prefix=/usr/local/whpg + # Cap parallelism — the link phase eats several GB per worker. + JOBS=\$(nproc); JOBS=\$(( JOBS > 16 ? 16 : JOBS )) + make -j\$JOBS + make install + chown -R gpadmin:gpadmin /usr/local/whpg +fi + +# gpstart/gpstop import the python pgdb (PygreSQL) module. Build it +# against whpg's pg_config so the right libpq is linked. +source /usr/local/whpg/greenplum_path.sh +pip3 install --quiet 'PygreSQL==5.2.5' psutil + +# Data dirs and gpadmin shell env. +mkdir -p /data0/primary /data0/coordinator +chown -R gpadmin:gpadmin /data0 +grep -q greenplum_path /home/gpadmin/.bashrc 2>/dev/null || \ + echo "source /usr/local/whpg/greenplum_path.sh" >> /home/gpadmin/.bashrc +grep -q COORDINATOR_DATA_DIRECTORY /home/gpadmin/.bashrc 2>/dev/null || \ + echo "export COORDINATOR_DATA_DIRECTORY=/data0/coordinator/gpseg-1" >> /home/gpadmin/.bashrc + +echo localhost > /home/gpadmin/hosts +chown gpadmin:gpadmin /home/gpadmin/hosts +EOF + +# 5. Drop gpinitsystem_config in and run gpinitsystem. +sudo docker cp "$(dirname "$0")/gpinitsystem_config" \ + "$NAME":/home/gpadmin/gpinitsystem_config +sudo docker exec "$NAME" chown gpadmin:gpadmin /home/gpadmin/gpinitsystem_config + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'gpinitsystem -a -c gpinitsystem_config -h hosts' diff --git a/warehousepg/load b/warehousepg/load new file mode 100755 index 0000000000..3d6b5c90e3 --- /dev/null +++ b/warehousepg/load @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu + +NAME=clickbench-warehousepg + +# Stream hits.tsv and create.sql into the container via tar so host-side +# symlinks are dereferenced (docker cp would pass them through). +tar -ch hits.tsv create.sql | \ + sudo docker exec -i "$NAME" tar -xC /home/gpadmin +sudo docker exec "$NAME" chown gpadmin:gpadmin \ + /home/gpadmin/hits.tsv /home/gpadmin/create.sql + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -f /home/gpadmin/create.sql' + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'pgrep -u gpadmin gpfdist || nohup gpfdist -d /home/gpadmin >/tmp/gpfdist.log 2>&1 &' + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -t -c "INSERT INTO hits SELECT * FROM hits_ext;"' +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \ + 'psql -d postgres -v ON_ERROR_STOP=1 -t -c "ANALYZE hits;"' + +sudo docker exec "$NAME" rm -f /home/gpadmin/hits.tsv +rm -f hits.tsv +sync diff --git a/warehousepg/queries.sql b/warehousepg/queries.sql new file mode 100644 index 0000000000..31f65fc898 --- /dev/null +++ b/warehousepg/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/warehousepg/query b/warehousepg/query new file mode 100755 index 0000000000..34f359227d --- /dev/null +++ b/warehousepg/query @@ -0,0 +1,30 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it via psql against the postgres DB +# inside the clickbench-warehousepg container as gpadmin. +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line. +# Exit non-zero on error. +set -e + +NAME=clickbench-warehousepg +query=$(cat) + +out=$(printf '\\timing\n%s\n' "$query" | \ + sudo docker exec -i "$NAME" sudo -iu gpadmin bash -lc 'psql -d postgres -t' 2>&1) +status=$? + +if printf '%s\n' "$out" | grep -q '^ERROR\|psql: error'; then + printf '%s\n' "$out" >&2 + exit 1 +fi + +printf '%s\n' "$out" | grep -v '^Time:' + +time_ms=$(printf '%s\n' "$out" | grep -oP 'Time:\s+\K[0-9]+\.[0-9]+' | tail -n1) +if [ -z "$time_ms" ]; then + echo "no timing in psql output" >&2 + exit 1 +fi +awk -v ms="$time_ms" 'BEGIN { printf "%.3f\n", ms / 1000 }' >&2 + +exit "$status" diff --git a/warehousepg/start b/warehousepg/start new file mode 100755 index 0000000000..02415d5e4d --- /dev/null +++ b/warehousepg/start @@ -0,0 +1,9 @@ +#!/bin/bash +set -eu + +NAME=clickbench-warehousepg + +sudo docker start "$NAME" >/dev/null + +sudo docker exec "$NAME" bash -c 'pgrep sshd >/dev/null || /usr/sbin/sshd' +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc 'gpstart -a' || true diff --git a/warehousepg/stop b/warehousepg/stop new file mode 100755 index 0000000000..90b461c53a --- /dev/null +++ b/warehousepg/stop @@ -0,0 +1,6 @@ +#!/bin/bash + +NAME=clickbench-warehousepg + +sudo docker exec "$NAME" sudo -iu gpadmin bash -lc 'gpstop -a' 2>/dev/null || true +sudo docker stop "$NAME" >/dev/null 2>&1 || true diff --git a/warehousepg/template.json b/warehousepg/template.json new file mode 100644 index 0000000000..6fb029b704 --- /dev/null +++ b/warehousepg/template.json @@ -0,0 +1,12 @@ +{ + "system": "WarehousePG", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C", + "column-oriented", + "PostgreSQL compatible", + "lukewarm-cold-run" + ] +}