From 2d49cf5e3dfcf96c16e1146aa9b2f9eded2295cf Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Tue, 12 Aug 2025 17:35:56 +0000 Subject: [PATCH 1/8] Add Sirius ClickBench scripts. --- sirius/benchmark.sh | 52 +++++++++++++++++ sirius/check_libcudf_env.sh | 36 ++++++++++++ sirius/create.sql | 108 ++++++++++++++++++++++++++++++++++++ sirius/queries.sql | 43 ++++++++++++++ sirius/run.sh | 23 ++++++++ 5 files changed, 262 insertions(+) create mode 100755 sirius/benchmark.sh create mode 100755 sirius/check_libcudf_env.sh create mode 100644 sirius/create.sql create mode 100644 sirius/queries.sql create mode 100755 sirius/run.sh diff --git a/sirius/benchmark.sh b/sirius/benchmark.sh new file mode 100755 index 000000000..8966ff8a8 --- /dev/null +++ b/sirius/benchmark.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Install +sudo apt-get update -y +sudo apt-get install -y git g++ cmake ninja-build libssl-dev build-essential make ccache pip + +# Check libcudf environment +./check_libcudf_env.sh +if [[ $? -ne 0 ]]; then + echo "libcudf environment check failed. Exiting." + exit 1 +fi + +# Build Sirius +git clone --recurse-submodules https://github.com/sirius-db/sirius.git +cd sirius +export SIRIUS_HOME_PATH=`pwd` +cd duckdb +mkdir -p extension_external && cd extension_external +git clone https://github.com/duckdb/substrait.git +cd substrait +git reset --hard ec9f8725df7aa22bae7217ece2f221ac37563da4 #go to the right commit hash for duckdb substrait extension +cd $SIRIUS_HOME_PATH +make -j$(nproc) +export PATH="$PATH:`pwd`/build/release/" +cd .. + +# Load the data +sudo apt-get install -y pigz +wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' +pigz -d -f hits.tsv.gz + +echo -n "Load time: " +command time -f '%e' duckdb hits.db -f create.sql -c "COPY hits FROM 'hits.tsv' (QUOTE '')" + +# Run the queries + +./run.sh 2>&1 | tee log.txt + +echo -n "Data size: " +wc -c hits.db + +cat log.txt | + grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' | + sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' | + awk '{ + buf[i++] = $1 + if (i == 4) { + printf "[%s,%s,%s],\n", buf[1], buf[2], buf[3] + i = 0 + } + }' diff --git a/sirius/check_libcudf_env.sh b/sirius/check_libcudf_env.sh new file mode 100755 index 000000000..4fe01fe05 --- /dev/null +++ b/sirius/check_libcudf_env.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +if [[ -n "$CONDA_PREFIX" ]]; then + echo "You are in a Conda environment: $CONDA_DEFAULT_ENV" + + # Check if libcudf is installed + if conda list | grep -q "^libcudf"; then + echo "libcudf is installed in this environment." + else + echo "libcudf is NOT installed in this environment." + exit 1 + fi +else + echo "You are NOT in a Conda environment." + exit 1 +fi + +# Check if LIBCUDF_ENV_PREFIX is set +if [[ -z "$LIBCUDF_ENV_PREFIX" ]]; then + echo "LIBCUDF_ENV_PREFIX is not set." + exit 1 +fi + +# Check if it's a valid conda environment (conda-meta folder exists) +if [[ ! -d "$LIBCUDF_ENV_PREFIX/conda-meta" ]]; then + echo "LIBCUDF_ENV_PREFIX does not point to a valid Conda environment: $LIBCUDF_ENV_PREFIX" + exit 1 +fi + +# Use conda to list packages in that environment and grep for libcudf +if conda list --prefix "$LIBCUDF_ENV_PREFIX" | grep -q "^libcudf"; then + echo "libcudf is installed in the Conda environment at $LIBCUDF_ENV_PREFIX" +else + echo "libcudf is NOT installed in the Conda environment at $LIBCUDF_ENV_PREFIX" + exit 1 +fi \ No newline at end of file diff --git a/sirius/create.sql b/sirius/create.sql new file mode 100644 index 000000000..4d23eaac6 --- /dev/null +++ b/sirius/create.sql @@ -0,0 +1,108 @@ +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT, + Referer TEXT, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT, + Params TEXT, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT, + BrowserCountry TEXT, + SocialNetwork TEXT, + SocialAction TEXT, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT, + ParamCurrency TEXT, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT, + OpenstatCampaignID TEXT, + OpenstatAdID TEXT, + OpenstatSourceID TEXT, + UTMSource TEXT, + UTMMedium TEXT, + UTMCampaign TEXT, + UTMContent TEXT, + UTMTerm TEXT, + FromTag TEXT, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +); diff --git a/sirius/queries.sql b/sirius/queries.sql new file mode 100644 index 000000000..b4115ee3a --- /dev/null +++ b/sirius/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/sirius/run.sh b/sirius/run.sh new file mode 100755 index 000000000..a06fe4c78 --- /dev/null +++ b/sirius/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +TRIES=3 +GPU_CACHING_SIZE='40 GB' +GPU_PROCESSING_SIZE='40 GB' + +cat queries.sql | while read -r query; do + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null + + echo "$query"; + cli_params=() + cli_params+=("-c") + cli_params+=(".timer on") + cli_params+=("-c") + cli_params+=("call gpu_buffer_init(\"${GPU_CACHING_SIZE}\", \"${GPU_PROCESSING_SIZE}\");") + for i in $(seq 1 $TRIES); do + cli_params+=("-c") + cli_params+=("call gpu_processing(\"${query}\");") + done; + echo "${cli_params[@]}" + duckdb hits.db "${cli_params[@]}" +done; From 62218eeb108087eedfb9a6a8014327707ff9fef8 Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Tue, 28 Oct 2025 04:31:56 +0000 Subject: [PATCH 2/8] Update running scripts. --- sirius/benchmark.sh | 26 +++--------- sirius/check_libcudf_env.sh | 36 ----------------- sirius/dependencies.sh | 80 +++++++++++++++++++++++++++++++++++++ sirius/load.sql | 7 ++++ sirius/run.sh | 5 ++- 5 files changed, 95 insertions(+), 59 deletions(-) delete mode 100755 sirius/check_libcudf_env.sh create mode 100755 sirius/dependencies.sh create mode 100644 sirius/load.sql diff --git a/sirius/benchmark.sh b/sirius/benchmark.sh index 8966ff8a8..ff7b53004 100755 --- a/sirius/benchmark.sh +++ b/sirius/benchmark.sh @@ -1,37 +1,21 @@ #!/bin/bash -# Install -sudo apt-get update -y -sudo apt-get install -y git g++ cmake ninja-build libssl-dev build-essential make ccache pip - -# Check libcudf environment -./check_libcudf_env.sh -if [[ $? -ne 0 ]]; then - echo "libcudf environment check failed. Exiting." - exit 1 -fi +# Install dependencies +source dependencies.sh # Build Sirius git clone --recurse-submodules https://github.com/sirius-db/sirius.git cd sirius -export SIRIUS_HOME_PATH=`pwd` -cd duckdb -mkdir -p extension_external && cd extension_external -git clone https://github.com/duckdb/substrait.git -cd substrait -git reset --hard ec9f8725df7aa22bae7217ece2f221ac37563da4 #go to the right commit hash for duckdb substrait extension -cd $SIRIUS_HOME_PATH +source setup_sirius.sh make -j$(nproc) export PATH="$PATH:`pwd`/build/release/" cd .. # Load the data -sudo apt-get install -y pigz -wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -pigz -d -f hits.tsv.gz +wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.parquet' echo -n "Load time: " -command time -f '%e' duckdb hits.db -f create.sql -c "COPY hits FROM 'hits.tsv' (QUOTE '')" +command time -f '%e' duckdb hits.db -f create.sql -f load.sql # Run the queries diff --git a/sirius/check_libcudf_env.sh b/sirius/check_libcudf_env.sh deleted file mode 100755 index 4fe01fe05..000000000 --- a/sirius/check_libcudf_env.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -if [[ -n "$CONDA_PREFIX" ]]; then - echo "You are in a Conda environment: $CONDA_DEFAULT_ENV" - - # Check if libcudf is installed - if conda list | grep -q "^libcudf"; then - echo "libcudf is installed in this environment." - else - echo "libcudf is NOT installed in this environment." - exit 1 - fi -else - echo "You are NOT in a Conda environment." - exit 1 -fi - -# Check if LIBCUDF_ENV_PREFIX is set -if [[ -z "$LIBCUDF_ENV_PREFIX" ]]; then - echo "LIBCUDF_ENV_PREFIX is not set." - exit 1 -fi - -# Check if it's a valid conda environment (conda-meta folder exists) -if [[ ! -d "$LIBCUDF_ENV_PREFIX/conda-meta" ]]; then - echo "LIBCUDF_ENV_PREFIX does not point to a valid Conda environment: $LIBCUDF_ENV_PREFIX" - exit 1 -fi - -# Use conda to list packages in that environment and grep for libcudf -if conda list --prefix "$LIBCUDF_ENV_PREFIX" | grep -q "^libcudf"; then - echo "libcudf is installed in the Conda environment at $LIBCUDF_ENV_PREFIX" -else - echo "libcudf is NOT installed in the Conda environment at $LIBCUDF_ENV_PREFIX" - exit 1 -fi \ No newline at end of file diff --git a/sirius/dependencies.sh b/sirius/dependencies.sh new file mode 100755 index 000000000..eef2f1062 --- /dev/null +++ b/sirius/dependencies.sh @@ -0,0 +1,80 @@ +#!/bin/bash +echo "Checking dependencies..." + +# DuckDB dependencies +sudo apt-get update -y +sudo apt-get install -y git g++ ninja-build libssl-dev build-essential make ccache pip + +# CMake +CMAKE_REQUIRED_VERSION="3.30.4" +if command -v cmake &>/dev/null; then + CMAKE_INSTALLED_VERSION=$(cmake --version | head -n1 | awk '{print $3}') +else + CMAKE_INSTALLED_VERSION="0" +fi + +version_lt() { + [ "$(printf '%s\n%s\n' "$1" "$2" | sort -V | head -n1)" != "$2" ] +} + +if version_lt "$CMAKE_INSTALLED_VERSION" "$CMAKE_REQUIRED_VERSION"; then + echo "CMake version too old, installing version $CMAKE_REQUIRED_VERSION..." + sudo apt purge --autoremove -y cmake + wget -qO- "https://github.com/Kitware/CMake/releases/download/v$CMAKE_REQUIRED_VERSION/cmake-$CMAKE_REQUIRED_VERSION.tar.gz" | tar -xz + cd "cmake-$CMAKE_REQUIRED_VERSION" + ./bootstrap + make -j$(nproc) + sudo make install + cd .. + rm -rf "cmake-$CMAKE_REQUIRED_VERSION" +fi + +# CONDA +MINICONDA_DIR="$HOME/miniconda3" +if [[ ! -d "$MINICONDA_DIR" ]]; then + echo "Miniconda not found. Installing..." + ARCH=$(uname -m) + if [[ "$ARCH" == "x86_64" ]]; then + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" + elif [[ "$ARCH" == "aarch64" ]]; then + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh" + else + echo "Unsupported architecture for Miniconda: $ARCH" + exit 1 + fi + wget -q $MINICONDA_URL -O /tmp/Miniconda3-latest.sh + bash /tmp/Miniconda3-latest.sh -b -p $HOME/miniconda3 + rm /tmp/Miniconda3-latest.sh +fi +source "$MINICONDA_DIR/etc/profile.d/conda.sh" + +# Accept ToS for non-interactive use +conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main +conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r + +# CUDF +CUDF_REQUIRED_VERSION="25.12" +CUDF_NIGHTLY=true +CONDA_CUDF_ENV_NAME="libcudf-env" +CONDA_BASE=$(conda info --base) +export LIBCUDF_ENV_PREFIX="$CONDA_BASE/envs/$CONDA_CUDF_ENV_NAME" + +check_libcudf_version() { + conda list --prefix "$1" 2>/dev/null | grep "^libcudf" | awk '{print $2}' +} +CUDF_INSTALLED_VERSION=$(check_libcudf_version "$LIBCUDF_ENV_PREFIX" || echo "") + +if [[ ! -d "$LIBCUDF_ENV_PREFIX" ]]; then + echo "Creating conda environment $CONDA_CUDF_ENV_NAME..." + conda create --name "$CONDA_CUDF_ENV_NAME" -y +fi +conda activate "$CONDA_CUDF_ENV_NAME" + +if [[ "$CUDF_INSTALLED_VERSION" != "$CUDF_REQUIRED_VERSION" ]]; then + echo "Installing/upgrading libcudf $CUDF_REQUIRED_VERSION..." + if [[ "$CUDF_NIGHTLY" = true ]]; then + conda install -c rapidsai -c conda-forge -c nvidia "rapidsai-nightly::libcudf=$CUDF_REQUIRED_VERSION" -y + else + conda install -c rapidsai -c conda-forge -c nvidia "rapidsai::libcudf=$CUDF_REQUIRED_VERSION" -y + fi +fi diff --git a/sirius/load.sql b/sirius/load.sql new file mode 100644 index 000000000..24891835f --- /dev/null +++ b/sirius/load.sql @@ -0,0 +1,7 @@ +INSERT INTO hits +SELECT * REPLACE ( + make_date(EventDate) AS EventDate, + epoch_ms(EventTime * 1000) AS EventTime, + epoch_ms(ClientEventTime * 1000) AS ClientEventTime, + epoch_ms(LocalEventTime * 1000) AS LocalEventTime) +FROM read_parquet('hits.parquet', binary_as_string=True); diff --git a/sirius/run.sh b/sirius/run.sh index a06fe4c78..ac5dd8243 100755 --- a/sirius/run.sh +++ b/sirius/run.sh @@ -1,8 +1,9 @@ #!/bin/bash TRIES=3 -GPU_CACHING_SIZE='40 GB' +GPU_CACHING_SIZE='80 GB' GPU_PROCESSING_SIZE='40 GB' +CPU_PROCESSING_SIZE="100 GB" cat queries.sql | while read -r query; do sync @@ -13,7 +14,7 @@ cat queries.sql | while read -r query; do cli_params+=("-c") cli_params+=(".timer on") cli_params+=("-c") - cli_params+=("call gpu_buffer_init(\"${GPU_CACHING_SIZE}\", \"${GPU_PROCESSING_SIZE}\");") + cli_params+=("call gpu_buffer_init(\"${GPU_CACHING_SIZE}\", \"${GPU_PROCESSING_SIZE}\", pinned_memory_size = \"${CPU_PROCESSING_SIZE}\");") for i in $(seq 1 $TRIES); do cli_params+=("-c") cli_params+=("call gpu_processing(\"${query}\");") From 0386b642d5e3b31afeea6b71918dbcf2a88132d1 Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Tue, 28 Oct 2025 16:57:42 +0000 Subject: [PATCH 3/8] Add results on lambda-GH200. --- sirius/results/lambda-GH200.json | 57 ++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 sirius/results/lambda-GH200.json diff --git a/sirius/results/lambda-GH200.json b/sirius/results/lambda-GH200.json new file mode 100644 index 000000000..09306a811 --- /dev/null +++ b/sirius/results/lambda-GH200.json @@ -0,0 +1,57 @@ +{ + "system": "Sirius", + "date": "2025-10-22", + "machine": "lambda-GH200", + "cluster_size": 1, + "proprietary": "no", + "tuned": "no", + "tags": ["C++","GPU-accelerated","column-oriented","embedded"], + "load_time": 26.30, + "data_size": 26903326720, + "result": [ + [0.012,0.001,0.000], + [0.113,0.004,0.004], + [0.437,0.005,0.005], + [0.311,0.005,0.004], + [0.325,0.008,0.007], + [1.500,0.013,0.011], + [0.149,0.004,0.004], + [0.243,0.004,0.005], + [0.622,0.085,0.085], + [0.850,0.090,0.089], + [1.104,0.016,0.016], + [1.155,0.017,0.017], + [1.532,0.023,0.023], + [1.829,0.130,0.129], + [1.578,0.025,0.025], + [0.334,0.014,0.013], + [1.774,0.035,0.035], + [1.708,0.025,0.026], + [2.040,0.057,0.057], + [0.284,0.004,0.004], + [4.840,0.042,0.042], + [6.285,0.023,0.023], + [10.631,0.060,0.060], + [44.611,0.065,0.067], + [1.694,0.013,0.013], + [1.408,0.015,0.015], + [1.699,0.034,0.033], + [5.111,0.253,0.252], + [7.632,3.841,3.844], + [0.300,0.036,0.034], + [2.012,0.009,0.010], + [2.154,0.013,0.013], + [0.806,0.072,0.072], + [4.858,0.076,0.076], + [4.854,0.080,0.079], + [0.317,0.021,0.020], + [5.009,0.007,0.007], + [4.168,0.007,0.006], + [5.015,0.006,0.005], + [9.068,0.012,0.011], + [0.942,0.005,0.005], + [0.850,0.005,0.005], + [0.485,0.006,0.005] +] +} + From 3d79c39f49839e2398495eacfe87b819d66a3e7e Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Mon, 3 Nov 2025 17:09:56 +0000 Subject: [PATCH 4/8] Update results --- sirius/results/lambda-GH200.json | 90 ++++++++++++++++---------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/sirius/results/lambda-GH200.json b/sirius/results/lambda-GH200.json index 09306a811..a70d6e81b 100644 --- a/sirius/results/lambda-GH200.json +++ b/sirius/results/lambda-GH200.json @@ -1,6 +1,6 @@ { "system": "Sirius", - "date": "2025-10-22", + "date": "2025-10-29", "machine": "lambda-GH200", "cluster_size": 1, "proprietary": "no", @@ -9,49 +9,49 @@ "load_time": 26.30, "data_size": 26903326720, "result": [ - [0.012,0.001,0.000], - [0.113,0.004,0.004], - [0.437,0.005,0.005], - [0.311,0.005,0.004], - [0.325,0.008,0.007], - [1.500,0.013,0.011], - [0.149,0.004,0.004], - [0.243,0.004,0.005], - [0.622,0.085,0.085], - [0.850,0.090,0.089], - [1.104,0.016,0.016], - [1.155,0.017,0.017], - [1.532,0.023,0.023], - [1.829,0.130,0.129], - [1.578,0.025,0.025], - [0.334,0.014,0.013], - [1.774,0.035,0.035], - [1.708,0.025,0.026], - [2.040,0.057,0.057], - [0.284,0.004,0.004], - [4.840,0.042,0.042], - [6.285,0.023,0.023], - [10.631,0.060,0.060], - [44.611,0.065,0.067], - [1.694,0.013,0.013], - [1.408,0.015,0.015], - [1.699,0.034,0.033], - [5.111,0.253,0.252], - [7.632,3.841,3.844], - [0.300,0.036,0.034], - [2.012,0.009,0.010], - [2.154,0.013,0.013], - [0.806,0.072,0.072], - [4.858,0.076,0.076], - [4.854,0.080,0.079], - [0.317,0.021,0.020], - [5.009,0.007,0.007], - [4.168,0.007,0.006], - [5.015,0.006,0.005], - [9.068,0.012,0.011], - [0.942,0.005,0.005], - [0.850,0.005,0.005], - [0.485,0.006,0.005] -] + [0.014,0.001,0.001], + [0.377,0.004,0.004], + [0.761,0.005,0.004], + [0.820,0.004,0.004], + [0.524,0.007,0.007], + [2.229,0.010,0.011], + [0.279,0.004,0.004], + [0.264,0.005,0.005], + [1.004,0.085,0.085], + [1.328,0.089,0.089], + [1.663,0.018,0.017], + [1.671,0.018,0.018], + [1.842,0.018,0.018], + [2.390,0.125,0.124], + [2.128,0.020,0.019], + [0.550,0.013,0.013], + [2.337,0.036,0.036], + [2.289,0.026,0.026], + [3.523,0.059,0.059], + [0.493,0.005,0.003], + [10.788,0.041,0.041], + [9.106,0.014,0.014], + [17.759,0.032,0.032], + [72.638,0.066,0.067], + [3.314,0.012,0.015], + [1.784,0.009,0.009], + [2.471,0.028,0.028], + [11.694,0.164,0.164], + [11.721,3.650,3.652], + [0.594,0.036,0.035], + [3.205,0.010,0.009], + [3.983,0.013,0.013], + [1.703,0.072,0.072], + [7.309,0.075,0.074], + [7.268,0.078,0.077], + [0.484,0.020,0.020], + [7.755,0.007,0.006], + [8.632,0.006,0.005], + [7.701,0.005,0.005], + [16.136,0.010,0.010], + [3.365,0.006,0.005], + [2.124,0.005,0.005], + [1.576,0.006,0.005] + ] } From 21c8337e212657f9511378454b2215cee2bcfee2 Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Mon, 10 Nov 2025 21:41:55 +0000 Subject: [PATCH 5/8] Update results. --- sirius/benchmark.sh | 1 + sirius/results/lambda-GH200.json | 86 ++++++++++++++++---------------- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/sirius/benchmark.sh b/sirius/benchmark.sh index ff7b53004..1c369f170 100755 --- a/sirius/benchmark.sh +++ b/sirius/benchmark.sh @@ -6,6 +6,7 @@ source dependencies.sh # Build Sirius git clone --recurse-submodules https://github.com/sirius-db/sirius.git cd sirius +git checkout clickbench source setup_sirius.sh make -j$(nproc) export PATH="$PATH:`pwd`/build/release/" diff --git a/sirius/results/lambda-GH200.json b/sirius/results/lambda-GH200.json index a70d6e81b..e7ac8d401 100644 --- a/sirius/results/lambda-GH200.json +++ b/sirius/results/lambda-GH200.json @@ -1,6 +1,6 @@ { "system": "Sirius", - "date": "2025-10-29", + "date": "2025-11-07", "machine": "lambda-GH200", "cluster_size": 1, "proprietary": "no", @@ -10,48 +10,48 @@ "data_size": 26903326720, "result": [ [0.014,0.001,0.001], - [0.377,0.004,0.004], - [0.761,0.005,0.004], - [0.820,0.004,0.004], - [0.524,0.007,0.007], - [2.229,0.010,0.011], - [0.279,0.004,0.004], - [0.264,0.005,0.005], - [1.004,0.085,0.085], - [1.328,0.089,0.089], - [1.663,0.018,0.017], - [1.671,0.018,0.018], - [1.842,0.018,0.018], - [2.390,0.125,0.124], - [2.128,0.020,0.019], - [0.550,0.013,0.013], - [2.337,0.036,0.036], - [2.289,0.026,0.026], - [3.523,0.059,0.059], - [0.493,0.005,0.003], - [10.788,0.041,0.041], - [9.106,0.014,0.014], - [17.759,0.032,0.032], - [72.638,0.066,0.067], - [3.314,0.012,0.015], - [1.784,0.009,0.009], - [2.471,0.028,0.028], - [11.694,0.164,0.164], - [11.721,3.650,3.652], - [0.594,0.036,0.035], - [3.205,0.010,0.009], - [3.983,0.013,0.013], - [1.703,0.072,0.072], - [7.309,0.075,0.074], - [7.268,0.078,0.077], - [0.484,0.020,0.020], - [7.755,0.007,0.006], - [8.632,0.006,0.005], - [7.701,0.005,0.005], - [16.136,0.010,0.010], - [3.365,0.006,0.005], - [2.124,0.005,0.005], - [1.576,0.006,0.005] + [0.374,0.004,0.004], + [0.757,0.005,0.004], + [0.793,0.005,0.004], + [0.541,0.008,0.007], + [2.117,0.013,0.010], + [0.277,0.005,0.004], + [0.289,0.005,0.011], + [1.006,0.084,0.085], + [1.334,0.089,0.089], + [1.621,0.017,0.016], + [1.649,0.018,0.017], + [1.835,0.018,0.018], + [2.432,0.124,0.125], + [2.100,0.020,0.019], + [0.557,0.013,0.013], + [2.303,0.036,0.036], + [2.314,0.026,0.025], + [3.444,0.060,0.059], + [0.499,0.004,0.004], + [10.373,0.041,0.041], + [9.085,0.014,0.013], + [17.467,0.032,0.032], + [70.218,0.066,0.067], + [3.397,0.013,0.013], + [1.795,0.009,0.009], + [2.453,0.028,0.027], + [12.015,0.165,0.165], + [9.587,0.268,0.267], + [0.576,0.036,0.035], + [3.161,0.010,0.009], + [3.995,0.013,0.013], + [1.673,0.072,0.072], + [7.463,0.075,0.074], + [7.331,0.077,0.077], + [0.475,0.020,0.020], + [7.886,0.006,0.006], + [8.330,0.006,0.006], + [7.853,0.006,0.005], + [16.314,0.010,0.010], + [3.441,0.006,0.005], + [2.121,0.005,0.005], + [1.605,0.005,0.005] ] } From b6f10c6ea25c383da8335913b65cee08d530e572 Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Sun, 7 Dec 2025 21:38:40 +0000 Subject: [PATCH 6/8] Add results of AWS p5.4xlarge. --- sirius/results/p5.4xlarge.json | 57 ++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 sirius/results/p5.4xlarge.json diff --git a/sirius/results/p5.4xlarge.json b/sirius/results/p5.4xlarge.json new file mode 100644 index 000000000..ccb34aade --- /dev/null +++ b/sirius/results/p5.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Sirius", + "date": "2025-12-07", + "machine": "p5.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "tuned": "no", + "tags": ["C++","GPU-accelerated","column-oriented","embedded"], + "load_time": 237.18, + "data_size": 26921938944, + "result": [ + [0.028,0.000,0.001], + [1.468,0.002,0.001], + [3.838,0.003,0.002], + [3.447,0.002,0.002], + [3.028,0.005,0.005], + [6.696,0.008,0.008], + [1.168,0.002,0.002], + [1.840,0.003,0.002], + [5.249,0.092,0.093], + [7.568,0.098,0.097], + [6.409,0.015,0.015], + [6.295,0.015,0.015], + [6.824,0.069,0.068], + [9.907,0.191,0.192], + [8.017,0.075,0.074], + [3.551,0.012,0.011], + [9.769,0.121,0.121], + [9.751,0.024,0.024], + [13.273,0.161,0.160], + [2.906,0.002,0.001], + [41.637,0.040,0.040], + [47.953,0.012,0.012], + [81.308,0.031,0.031], + [224.830,0.065,0.065], + [11.813,0.008,0.008], + [6.547,0.007,0.007], + [12.030,0.128,0.129], + [43.496,0.164,0.163], + [32.878,0.274,0.274], + [2.226,0.038,0.036], + [13.673,0.008,0.007], + [19.490,0.012,0.012], + [13.002,0.081,0.081], + [42.224,0.545,0.547], + [42.345,0.556,0.552], + [2.619,0.019,0.019], + [45.643,0.009,0.009], + [33.959,0.004,0.004], + [45.765,0.003,0.003], + [73.482,0.020,0.020], + [16.912,0.003,0.003], + [14.177,0.003,0.002], + [9.339,0.003,0.002] + ] +} + From aea45f3992b4baa2e7790fb684543775036e1457 Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Wed, 10 Dec 2025 19:39:42 +0000 Subject: [PATCH 7/8] Add instructions to run benchmark script on AWS EC2 instances. --- sirius/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 sirius/README.md diff --git a/sirius/README.md b/sirius/README.md new file mode 100644 index 000000000..acb23ea77 --- /dev/null +++ b/sirius/README.md @@ -0,0 +1,15 @@ +# Sirius + +[Sirius](https://github.com/sirius-db/sirius) is an open-source, GPU-native SQL engine that provides drop-in acceleration for existing databases such as DuckDB. + +## Running the benchmark + +The benchmark script has been validated on both Lambda Cloud `GH200` instances and AWS EC2 `p5.4xlarge` instances. + +### AWS EC2 + +To run the benchmark on AWS, launch an EC2 instance using the `Deep Learning Base AMI with Single CUDA (Ubuntu 22.04)` (x86 version), which includes CUDA preinstalled. + +### Lambda Cloud + +Running the benchmark on Lambda Cloud requires no additional setup. \ No newline at end of file From e426123ebf96c6a5eac81c4ef2d8f9cb1a4373a3 Mon Sep 17 00:00:00 2001 From: Yifei-yang7 Date: Thu, 11 Dec 2025 01:12:21 +0000 Subject: [PATCH 8/8] Add gpu hardware tag and json template. --- sirius/results/lambda-GH200.json | 3 ++- sirius/results/p5.4xlarge.json | 3 ++- sirius/template.json | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 sirius/template.json diff --git a/sirius/results/lambda-GH200.json b/sirius/results/lambda-GH200.json index e7ac8d401..768c623d0 100644 --- a/sirius/results/lambda-GH200.json +++ b/sirius/results/lambda-GH200.json @@ -4,8 +4,9 @@ "machine": "lambda-GH200", "cluster_size": 1, "proprietary": "no", + "hardware": "gpu", "tuned": "no", - "tags": ["C++","GPU-accelerated","column-oriented","embedded"], + "tags": ["C++","column-oriented","embedded","lukewarm-cold-run"], "load_time": 26.30, "data_size": 26903326720, "result": [ diff --git a/sirius/results/p5.4xlarge.json b/sirius/results/p5.4xlarge.json index ccb34aade..0cf36013a 100644 --- a/sirius/results/p5.4xlarge.json +++ b/sirius/results/p5.4xlarge.json @@ -4,8 +4,9 @@ "machine": "p5.4xlarge", "cluster_size": 1, "proprietary": "no", + "hardware": "gpu", "tuned": "no", - "tags": ["C++","GPU-accelerated","column-oriented","embedded"], + "tags": ["C++","column-oriented","embedded","lukewarm-cold-run"], "load_time": 237.18, "data_size": 26921938944, "result": [ diff --git a/sirius/template.json b/sirius/template.json new file mode 100644 index 000000000..512372224 --- /dev/null +++ b/sirius/template.json @@ -0,0 +1,12 @@ +{ + "system": "Sirius", + "proprietary": "no", + "hardware": "gpu", + "tuned": "no", + "tags": [ + "C++", + "column-oriented", + "embedded", + "lukewarm-cold-run" + ] +}