From 9b6609afbb593612bde765fd04f03e51e2db0982 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 28 Apr 2025 17:23:39 +0800 Subject: [PATCH 01/10] doris update update --- doris/benchmark.sh | 33 +++++++++ doris/count.sh | 13 ++++ doris/create_and_load.sh | 31 ++++++++ doris/ddl_generic.sql | 13 ++++ doris/drop_table.sh | 13 ++++ doris/install.sh | 7 ++ doris/load_data.sh | 75 +++++++++++++++++++ doris/main.sh | 76 ++++++++++++++++++++ doris/queries.sql | 5 ++ doris/results/m6i.8xlarge_bluesky_1000m.json | 20 ++++++ doris/run_queries.sh | 33 +++++++++ doris/start.sh | 11 +++ doris/total_size.sh | 13 ++++ doris/uninstall.sh | 5 ++ 14 files changed, 348 insertions(+) create mode 100755 doris/benchmark.sh create mode 100755 doris/count.sh create mode 100755 doris/create_and_load.sh create mode 100644 doris/ddl_generic.sql create mode 100755 doris/drop_table.sh create mode 100755 doris/install.sh create mode 100755 doris/load_data.sh create mode 100755 doris/main.sh create mode 100644 doris/queries.sql create mode 100644 doris/results/m6i.8xlarge_bluesky_1000m.json create mode 100755 doris/run_queries.sh create mode 100755 doris/start.sh create mode 100755 doris/total_size.sh create mode 100755 doris/uninstall.sh diff --git a/doris/benchmark.sh b/doris/benchmark.sh new file mode 100755 index 0000000..dd8834f --- /dev/null +++ b/doris/benchmark.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 3 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +RESULT_FILE_RUNTIMES="$2" +RESULT_FILE_MEMORY_USAGE="$3" + +# Construct the query log file name using $DB_NAME +QUERY_LOG_FILE="query_log.txt" + +# Print the database name +echo "Running queries on database: $DB_NAME" + +# Run queries and log the output +./run_queries.sh "$DB_NAME" 2>&1 | tee query_log.txt + +# Process the query log and prepare the result +RESULT=$(cat query_log.txt | grep -oP 'Response time: \d+\.\d+ s' | sed -r -e 's/Response time: ([0-9]+\.[0-9]+) s/\1/' | \ +awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }') + +# Output the result +if [[ -n "$RESULT_FILE_RUNTIMES" ]]; then + echo "$RESULT" > "$RESULT_FILE_RUNTIMES" + echo "Result written to $RESULT_FILE_RUNTIMES" +else + echo "$RESULT" +fi diff --git a/doris/count.sh b/doris/count.sh new file mode 100755 index 0000000..ff30fca --- /dev/null +++ b/doris/count.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +TABLE_NAME="$2" + +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "SELECT count() FROM $TABLE_NAME;" diff --git a/doris/create_and_load.sh b/doris/create_and_load.sh new file mode 100755 index 0000000..e8d84bd --- /dev/null +++ b/doris/create_and_load.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 7 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +TABLE_NAME="$2" +DDL_FILE="$3" +DATA_DIRECTORY="$4" +NUM_FILES="$5" +SUCCESS_LOG="$6" +ERROR_LOG="$7" + +# Validate arguments +[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; } +[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } +[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; } + + +echo "Create database" +mysql -P 9030 -h 127.0.0.1 -u root -e "CREATE DATABASE IF NOT EXISTS $DB_NAME" + +echo "Execute DDL" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "$DDL_FILE" + +echo "Load data" +./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" diff --git a/doris/ddl_generic.sql b/doris/ddl_generic.sql new file mode 100644 index 0000000..f12b6ab --- /dev/null +++ b/doris/ddl_generic.sql @@ -0,0 +1,13 @@ +CREATE TABLE bluesky ( + kind VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.kind')) NOT NULL, + operation VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.operation')) NULL, + collection VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.collection')) NULL, + did VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data,'$.did')) NOT NULL, + time_us BIGINT GENERATED ALWAYS AS (get_json_bigint(data, '$.time_us')) NOT NULL, + `data` variant NOT NULL +) +DUPLICATE KEY (kind, operation, collection) +DISTRIBUTED BY HASH(collection, did) BUCKETS 32 +PROPERTIES ( + "replication_num"="1" +); diff --git a/doris/drop_table.sh b/doris/drop_table.sh new file mode 100755 index 0000000..6e20078 --- /dev/null +++ b/doris/drop_table.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +DB_NAME="$1" +TABLE_NAME="$2" + +echo "Dropping table: $DB_NAME.$TABLE_NAME" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "DROP TABLE IF EXISTS $TABLE_NAME" diff --git a/doris/install.sh b/doris/install.sh new file mode 100755 index 0000000..68b70b3 --- /dev/null +++ b/doris/install.sh @@ -0,0 +1,7 @@ +#!/bin/bash +wget --timestamping https://apache-doris-releases.oss-accelerate.aliyuncs.com/${DORIS_PACKAGE}.tar.gz +mkdir ${DORIS_PACKAGE} +tar -xvf ./${DORIS_PACKAGE}.tar.gz --strip-components 1 -C ./${DORIS_PACKAGE} + +echo "storage_page_cache_limit=60%" >> ./${DORIS_PACKAGE}/be/conf/be.conf +echo "enable_java_support=false" >> ./${DORIS_PACKAGE}/be/conf/be.conf diff --git a/doris/load_data.sh b/doris/load_data.sh new file mode 100755 index 0000000..df28b5b --- /dev/null +++ b/doris/load_data.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " + exit 1 +fi + + +# Arguments +DATA_DIRECTORY="$1" +DB_NAME="$2" +TABLE_NAME="$3" +MAX_FILES="$4" +SUCCESS_LOG="$5" +ERROR_LOG="$6" + +# Validate arguments +[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } +[[ ! "$MAX_FILES" =~ ^[0-9]+$ ]] && { echo "Error: MAX_FILES must be a positive integer."; exit 1; } + +# Create a temporary directory for uncompressed files +TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX) +trap "rm -rf $TEMP_DIR" EXIT # Cleanup temp directory on script exit + +# Load data +counter=0 +start=0 +for file in $(ls "$DATA_DIRECTORY"/*.json.gz | head -n "$MAX_FILES"); do + echo "Processing file: $file" + num=$(echo "$file" | sed -n 's/[^0-9]*\([0-9]\+\).*/\1/p') + if [ "$num" -le "$start" ]; then + continue + fi + + # Uncompress the file into the TEMP_DIR + uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")" + gunzip -c "$file" > "$uncompressed_file" + + if [[ $? -ne 0 ]]; then + echo "Error: Failed to uncompress $file" >> "$ERROR_LOG" + continue + fi + MAX_ATTEMPT=10 + attempt=0 + while [ $attempt -lt $MAX_ATTEMPT ] + do + # Attempt the import + http_code=$(curl -s -w "%{http_code}" -o >(cat >/tmp/curl_body) --location-trusted -u root: -H "max_filter_ratio: 0.1" -H "Expect:100-continue" -H "columns: data" -T "$uncompressed_file" -XPUT http://127.0.0.1:8030/api/"$DB_NAME"/"$TABLE_NAME"/_stream_load) + response_body="$(cat /tmp/curl_body)" + response_status="$(cat /tmp/curl_body | jq -r '.Status')" + echo $response_status + if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then + if [ "$response_status" = "Success" ] + then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $file. Response: $response_body" >> "$SUCCESS_LOG" + rm -f "$uncompressed_file" # Delete the uncompressed file after successful processing + attempt=$((MAX_ATTEMPT)) + else + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $attempt attempt failed for $file with status code $http_code. Response: $response_body" >> "$ERROR_LOG" + attempt=$((attempt + 1)) + sleep 2 + fi + else + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $attempt attempt failed for $file with status code $http_code. Response: $response_body" >> "$ERROR_LOG" + attempt=$((attempt + 1)) + sleep 2 + fi + done + + counter=$((counter + 1)) + if [[ $counter -ge $MAX_FILES ]]; then + break + fi +done diff --git a/doris/main.sh b/doris/main.sh new file mode 100755 index 0000000..1a62905 --- /dev/null +++ b/doris/main.sh @@ -0,0 +1,76 @@ +#!/bin/bash +export DORIS_PACKAGE="apache-doris-3.0.5-bin-x64" + +DEFAULT_CHOICE=ask +DEFAULT_DATA_DIRECTORY=~/data/bluesky + +# Allow the user to optionally provide the scale factor ("choice") as an argument +CHOICE="${1:-$DEFAULT_CHOICE}" + +# Allow the user to optionally provide the data directory as an argument +DATA_DIRECTORY="${2:-$DEFAULT_DATA_DIRECTORY}" + +# Define success and error log files +SUCCESS_LOG="${3:-success.log}" +ERROR_LOG="${4:-error.log}" + +# Define prefix for output files +OUTPUT_PREFIX="${5:-_m6i.8xlarge}" + +# Check if the directory exists +if [[ ! -d "$DATA_DIRECTORY" ]]; then + echo "Error: Data directory '$DATA_DIRECTORY' does not exist." + exit 1 +fi + +if [ "$CHOICE" = "ask" ]; then + echo "Select the dataset size to benchmark:" + echo "1) 1m (default)" + echo "2) 10m" + echo "3) 100m" + echo "4) 1000m" + echo "5) all" + read -p "Enter the number corresponding to your choice: " CHOICE +fi; + +./install.sh +./start.sh + +benchmark() { + local size=$1 + local suffix=$2 + # Check DATA_DIRECTORY contains the required number of files to run the benchmark + file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) + if (( file_count < size )); then + echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." + exit 1 + fi + ./create_and_load.sh "bluesky_${size}m_${suffix}" bluesky "ddl_${suffix}.sql" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" + ./total_size.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.total_size" + ./count.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.count" + ./benchmark.sh "bluesky_${size}m_${suffix}" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_runtime" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_memory_usage" + ./drop_table.sh "bluesky_${size}m_${suffix}" bluesky +} + +case $CHOICE in + 2) + benchmark 10 generic + ;; + 3) + benchmark 100 generic + ;; + 4) + benchmark 1000 generic + ;; + 5) + benchmark 1 generic + benchmark 10 generic + benchmark 100 generic + benchmark 1000 generic + ;; + *) + benchmark 1 generic + ;; +esac + +./uninstall.sh diff --git a/doris/queries.sql b/doris/queries.sql new file mode 100644 index 0000000..9a9e630 --- /dev/null +++ b/doris/queries.sql @@ -0,0 +1,5 @@ +SELECT collection AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT collection AS event, COUNT(*) AS count, COUNT(DISTINCT did) AS users FROM bluesky WHERE kind = 'commit' AND operation = 'create' GROUP BY event ORDER BY count DESC; +SELECT collection AS event, HOUR(from_microsecond(time_us)) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT did AS user_id, MIN(from_microsecond(time_us)) AS first_post_ts FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT did AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(time_us)),MIN(from_microsecond(time_us))) AS activity_span FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/results/m6i.8xlarge_bluesky_1000m.json b/doris/results/m6i.8xlarge_bluesky_1000m.json new file mode 100644 index 0000000..abeff26 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1000m.json @@ -0,0 +1,20 @@ +{ + "system": "Doris", + "version": "3.0.5", + "os": "Ubuntu 24.04", + "date": "2025-04-30", + "machine": "m6i.8xlarge, 16000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 999999245, + "total_size": 215190746431, + "result": [ + [1.69,1.46,1.47], + [90.34,4.07,4.08], + [15.45,4.28,4.27], + [0.84,0.82,0.83], + [0.86,0.89,0.88] + ] +} diff --git a/doris/run_queries.sh b/doris/run_queries.sh new file mode 100755 index 0000000..04d5266 --- /dev/null +++ b/doris/run_queries.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" + +TRIES=3 + +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "set global parallel_pipeline_task_num=32;" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "set global enable_parallel_scan=false;" + +cat queries.sql | while read -r query; do + + # Clear the Linux file system cache + echo "Clearing file system cache..." + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + echo "File system cache cleared." + + # Print the query + echo "Running query: $query" + + # Execute the query multiple times + for i in $(seq 1 $TRIES); do + RESP=$(mysql -vvv -h127.1 -P9030 -uroot "$DB_NAME" -e "$query" | perl -nle 'if (/\((?:(\d+) min )?(\d+\.\d+) sec\)/) { $t = ($1 || 0) * 60 + $2; print $t }' ||:) + echo "Response time: ${RESP} s" + done; +done; diff --git a/doris/start.sh b/doris/start.sh new file mode 100755 index 0000000..0c27172 --- /dev/null +++ b/doris/start.sh @@ -0,0 +1,11 @@ +#!/bin/bash +export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 + +./${DORIS_PACKAGE}/be/bin/start_be.sh --daemon +./${DORIS_PACKAGE}/fe/bin/start_fe.sh --daemon + +sleep 30s + +mysql -P 9030 -h 127.0.0.1 -u root -e "ALTER SYSTEM ADD BACKEND \"127.0.0.1:9050\";" + +sleep 10s diff --git a/doris/total_size.sh b/doris/total_size.sh new file mode 100755 index 0000000..b0700d0 --- /dev/null +++ b/doris/total_size.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +TABLE_NAME="$2" + +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "SHOW DATA FROM $TABLE_NAME" diff --git a/doris/uninstall.sh b/doris/uninstall.sh new file mode 100755 index 0000000..df6fe50 --- /dev/null +++ b/doris/uninstall.sh @@ -0,0 +1,5 @@ +#!/bin/bash +./${DORIS_PACKAGE}/be/bin/stop_be.sh +./${DORIS_PACKAGE}/fe/bin/stop_fe.sh + +rm -rf ./${DORIS_PACKAGE} \ No newline at end of file From 3f48d945ae08aa9641c9be0fbc4a77c03a2d13c5 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 6 May 2025 16:25:20 +0800 Subject: [PATCH 02/10] update --- doris/benchmark.sh | 6 ++-- doris/ddl_default.sql | 8 +++++ .../{ddl_generic.sql => ddl_materialized.sql} | 0 doris/install.sh | 3 -- doris/main.sh | 29 ++++++++++++------- doris/queries_default.sql | 5 ++++ .../{queries.sql => queries_materialized.sql} | 0 doris/results/m6i.8xlarge_bluesky_1000m.json | 20 ------------- doris/run_queries.sh | 7 +++-- doris/start.sh | 2 -- doris/stop.sh | 3 ++ doris/uninstall.sh | 3 -- 12 files changed, 42 insertions(+), 44 deletions(-) create mode 100644 doris/ddl_default.sql rename doris/{ddl_generic.sql => ddl_materialized.sql} (100%) create mode 100644 doris/queries_default.sql rename doris/{queries.sql => queries_materialized.sql} (100%) delete mode 100644 doris/results/m6i.8xlarge_bluesky_1000m.json create mode 100755 doris/stop.sh diff --git a/doris/benchmark.sh b/doris/benchmark.sh index dd8834f..7955f78 100755 --- a/doris/benchmark.sh +++ b/doris/benchmark.sh @@ -2,14 +2,14 @@ # Check if the required arguments are provided if [[ $# -lt 3 ]]; then - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi # Arguments DB_NAME="$1" RESULT_FILE_RUNTIMES="$2" -RESULT_FILE_MEMORY_USAGE="$3" +QUERIES_FILE="$3" # Construct the query log file name using $DB_NAME QUERY_LOG_FILE="query_log.txt" @@ -18,7 +18,7 @@ QUERY_LOG_FILE="query_log.txt" echo "Running queries on database: $DB_NAME" # Run queries and log the output -./run_queries.sh "$DB_NAME" 2>&1 | tee query_log.txt +./run_queries.sh "$DB_NAME" "$QUERIES_FILE" 2>&1 | tee query_log.txt # Process the query log and prepare the result RESULT=$(cat query_log.txt | grep -oP 'Response time: \d+\.\d+ s' | sed -r -e 's/Response time: ([0-9]+\.[0-9]+) s/\1/' | \ diff --git a/doris/ddl_default.sql b/doris/ddl_default.sql new file mode 100644 index 0000000..4997a90 --- /dev/null +++ b/doris/ddl_default.sql @@ -0,0 +1,8 @@ +CREATE TABLE bluesky ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `data` variant NOT NULL +) +DISTRIBUTED BY HASH(id) BUCKETS 32 +PROPERTIES ( + "replication_num"="1" +); diff --git a/doris/ddl_generic.sql b/doris/ddl_materialized.sql similarity index 100% rename from doris/ddl_generic.sql rename to doris/ddl_materialized.sql diff --git a/doris/install.sh b/doris/install.sh index 68b70b3..c98e0ca 100755 --- a/doris/install.sh +++ b/doris/install.sh @@ -2,6 +2,3 @@ wget --timestamping https://apache-doris-releases.oss-accelerate.aliyuncs.com/${DORIS_PACKAGE}.tar.gz mkdir ${DORIS_PACKAGE} tar -xvf ./${DORIS_PACKAGE}.tar.gz --strip-components 1 -C ./${DORIS_PACKAGE} - -echo "storage_page_cache_limit=60%" >> ./${DORIS_PACKAGE}/be/conf/be.conf -echo "enable_java_support=false" >> ./${DORIS_PACKAGE}/be/conf/be.conf diff --git a/doris/main.sh b/doris/main.sh index 1a62905..a85d5f5 100755 --- a/doris/main.sh +++ b/doris/main.sh @@ -48,29 +48,38 @@ benchmark() { ./create_and_load.sh "bluesky_${size}m_${suffix}" bluesky "ddl_${suffix}.sql" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" ./total_size.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.total_size" ./count.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.count" - ./benchmark.sh "bluesky_${size}m_${suffix}" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_runtime" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_memory_usage" + ./benchmark.sh "bluesky_${size}m_${suffix}" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_runtime" "queries_${suffix}.sql" ./drop_table.sh "bluesky_${size}m_${suffix}" bluesky } case $CHOICE in 2) - benchmark 10 generic + benchmark 10 default + benchmark 10 materialized ;; 3) - benchmark 100 generic + benchmark 100 default + benchmark 100 materialized ;; 4) - benchmark 1000 generic + benchmark 1000 default + benchmark 1000 materialized ;; 5) - benchmark 1 generic - benchmark 10 generic - benchmark 100 generic - benchmark 1000 generic + benchmark 1 materialized + benchmark 1 default + benchmark 10 materialized + benchmark 10 default + benchmark 100 materialized + benchmark 100 default + benchmark 1000 materialized + benchmark 1000 default ;; *) - benchmark 1 generic + benchmark 1 materialized + benchmark 1 default ;; esac -./uninstall.sh +./stop.sh +#./uninstall.sh diff --git a/doris/queries_default.sql b/doris/queries_default.sql new file mode 100644 index 0000000..51514f2 --- /dev/null +++ b/doris/queries_default.sql @@ -0,0 +1,5 @@ +SELECT get_json_string(data, 'commit.collection') AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT get_json_string(data, 'commit.collection') AS event, COUNT(*) AS count, COUNT(DISTINCT get_json_string(data, 'did')) AS users FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' GROUP BY event ORDER BY count DESC; +SELECT get_json_string(data, 'commit.collection') AS event, HOUR(from_microsecond(get_json_int(data, 'time_us'))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT get_json_string(data, 'did') AS user_id, MIN(from_microsecond(get_json_int(data, 'time_us'))) AS first_post_ts FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT get_json_string(data, 'did') AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(get_json_int(data, 'time_us'))),MIN(from_microsecond(get_json_int(data, 'time_us')))) AS activity_span FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/queries.sql b/doris/queries_materialized.sql similarity index 100% rename from doris/queries.sql rename to doris/queries_materialized.sql diff --git a/doris/results/m6i.8xlarge_bluesky_1000m.json b/doris/results/m6i.8xlarge_bluesky_1000m.json deleted file mode 100644 index abeff26..0000000 --- a/doris/results/m6i.8xlarge_bluesky_1000m.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Doris", - "version": "3.0.5", - "os": "Ubuntu 24.04", - "date": "2025-04-30", - "machine": "m6i.8xlarge, 16000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 1000000000, - "num_loaded_documents": 999999245, - "total_size": 215190746431, - "result": [ - [1.69,1.46,1.47], - [90.34,4.07,4.08], - [15.45,4.28,4.27], - [0.84,0.82,0.83], - [0.86,0.89,0.88] - ] -} diff --git a/doris/run_queries.sh b/doris/run_queries.sh index 04d5266..e38a799 100755 --- a/doris/run_queries.sh +++ b/doris/run_queries.sh @@ -1,20 +1,21 @@ #!/bin/bash # Check if the required arguments are provided -if [[ $# -lt 1 ]]; then - echo "Usage: $0 " +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " exit 1 fi # Arguments DB_NAME="$1" +QUERIES_FILE="$2" TRIES=3 mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "set global parallel_pipeline_task_num=32;" mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "set global enable_parallel_scan=false;" -cat queries.sql | while read -r query; do +cat $QUERIES_FILE | while read -r query; do # Clear the Linux file system cache echo "Clearing file system cache..." diff --git a/doris/start.sh b/doris/start.sh index 0c27172..dea212f 100755 --- a/doris/start.sh +++ b/doris/start.sh @@ -1,6 +1,4 @@ #!/bin/bash -export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 - ./${DORIS_PACKAGE}/be/bin/start_be.sh --daemon ./${DORIS_PACKAGE}/fe/bin/start_fe.sh --daemon diff --git a/doris/stop.sh b/doris/stop.sh new file mode 100755 index 0000000..4a22411 --- /dev/null +++ b/doris/stop.sh @@ -0,0 +1,3 @@ +#!/bin/bash +./${DORIS_PACKAGE}/be/bin/stop_be.sh +./${DORIS_PACKAGE}/fe/bin/stop_fe.sh diff --git a/doris/uninstall.sh b/doris/uninstall.sh index df6fe50..8c60eab 100755 --- a/doris/uninstall.sh +++ b/doris/uninstall.sh @@ -1,5 +1,2 @@ #!/bin/bash -./${DORIS_PACKAGE}/be/bin/stop_be.sh -./${DORIS_PACKAGE}/fe/bin/stop_fe.sh - rm -rf ./${DORIS_PACKAGE} \ No newline at end of file From 88270e7fc0cc7af3b80f2dc3fadbb85e9cdf6c2d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 6 May 2025 16:07:33 +0000 Subject: [PATCH 03/10] Minor fixups --- doris/install.sh | 10 +++++++--- doris/main.sh | 5 +++-- doris/start.sh | 7 +++++-- doris/stop.sh | 5 +++-- doris/uninstall.sh | 5 ++++- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/doris/install.sh b/doris/install.sh index c98e0ca..3b8015a 100755 --- a/doris/install.sh +++ b/doris/install.sh @@ -1,4 +1,8 @@ #!/bin/bash -wget --timestamping https://apache-doris-releases.oss-accelerate.aliyuncs.com/${DORIS_PACKAGE}.tar.gz -mkdir ${DORIS_PACKAGE} -tar -xvf ./${DORIS_PACKAGE}.tar.gz --strip-components 1 -C ./${DORIS_PACKAGE} + +wget https://apache-doris-releases.oss-accelerate.aliyuncs.com/${DORIS_FULL_NAME}.tar.gz +mkdir ${DORIS_FULL_NAME} +tar -xvf ${DORIS_FULL_NAME}.tar.gz --strip-components 1 -C ${DORIS_FULL_NAME} + +sudo apt-get update +sudo apt-get install -y mysql-client openjdk-17-jre-headless # somehow _EXACTLY_ v17 is needed diff --git a/doris/main.sh b/doris/main.sh index a85d5f5..8d6c659 100755 --- a/doris/main.sh +++ b/doris/main.sh @@ -1,5 +1,6 @@ #!/bin/bash -export DORIS_PACKAGE="apache-doris-3.0.5-bin-x64" + +export DORIS_FULL_NAME="apache-doris-3.0.5-bin-x64" DEFAULT_CHOICE=ask DEFAULT_DATA_DIRECTORY=~/data/bluesky @@ -82,4 +83,4 @@ case $CHOICE in esac ./stop.sh -#./uninstall.sh +./uninstall.sh diff --git a/doris/start.sh b/doris/start.sh index dea212f..13e9029 100755 --- a/doris/start.sh +++ b/doris/start.sh @@ -1,9 +1,12 @@ #!/bin/bash -./${DORIS_PACKAGE}/be/bin/start_be.sh --daemon -./${DORIS_PACKAGE}/fe/bin/start_fe.sh --daemon +${DORIS_FULL_NAME}/be/bin/start_be.sh --daemon +${DORIS_FULL_NAME}/fe/bin/start_fe.sh --daemon + +echo "Sleep 30 sec" sleep 30s mysql -P 9030 -h 127.0.0.1 -u root -e "ALTER SYSTEM ADD BACKEND \"127.0.0.1:9050\";" +echo "Sleep 10 sec" sleep 10s diff --git a/doris/stop.sh b/doris/stop.sh index 4a22411..513f5ad 100755 --- a/doris/stop.sh +++ b/doris/stop.sh @@ -1,3 +1,4 @@ #!/bin/bash -./${DORIS_PACKAGE}/be/bin/stop_be.sh -./${DORIS_PACKAGE}/fe/bin/stop_fe.sh + +${DORIS_FULL_NAME}/be/bin/stop_be.sh +${DORIS_FULL_NAME}/fe/bin/stop_fe.sh diff --git a/doris/uninstall.sh b/doris/uninstall.sh index 8c60eab..89a5c4b 100755 --- a/doris/uninstall.sh +++ b/doris/uninstall.sh @@ -1,2 +1,5 @@ #!/bin/bash -rm -rf ./${DORIS_PACKAGE} \ No newline at end of file + +sudo apt-get remove -y mysql-client openjdk-17-jre-headless + +rm -rf ${DORIS_FULL_NAME} From 578d9210ef51e583b3ac181e0bba09b4736696af Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Wed, 7 May 2025 23:40:49 +0800 Subject: [PATCH 04/10] update sql and result --- doris/ddl_materialized.sql | 2 +- doris/queries_default.sql | 10 +++++----- doris/queries_materialized.sql | 6 +++--- .../m6i.8xlarge_bluesky_1000m_default.json | 20 +++++++++++++++++++ ...6i.8xlarge_bluesky_1000m_materialized.json | 20 +++++++++++++++++++ doris/run_queries.sh | 2 +- doris/start.sh | 4 ++++ 7 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_default.json create mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_materialized.json diff --git a/doris/ddl_materialized.sql b/doris/ddl_materialized.sql index f12b6ab..727987c 100644 --- a/doris/ddl_materialized.sql +++ b/doris/ddl_materialized.sql @@ -3,7 +3,7 @@ CREATE TABLE bluesky ( operation VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.operation')) NULL, collection VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.collection')) NULL, did VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data,'$.did')) NOT NULL, - time_us BIGINT GENERATED ALWAYS AS (get_json_bigint(data, '$.time_us')) NOT NULL, + time DATETIME GENERATED ALWAYS AS (from_microsecond(get_json_bigint(data, '$.time_us'))) NOT NULL, `data` variant NOT NULL ) DUPLICATE KEY (kind, operation, collection) diff --git a/doris/queries_default.sql b/doris/queries_default.sql index 51514f2..3885734 100644 --- a/doris/queries_default.sql +++ b/doris/queries_default.sql @@ -1,5 +1,5 @@ -SELECT get_json_string(data, 'commit.collection') AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; -SELECT get_json_string(data, 'commit.collection') AS event, COUNT(*) AS count, COUNT(DISTINCT get_json_string(data, 'did')) AS users FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' GROUP BY event ORDER BY count DESC; -SELECT get_json_string(data, 'commit.collection') AS event, HOUR(from_microsecond(get_json_int(data, 'time_us'))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; -SELECT get_json_string(data, 'did') AS user_id, MIN(from_microsecond(get_json_int(data, 'time_us'))) AS first_post_ts FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; -SELECT get_json_string(data, 'did') AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(get_json_int(data, 'time_us'))),MIN(from_microsecond(get_json_int(data, 'time_us')))) AS activity_span FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count, COUNT(DISTINCT cast(data['did'] AS TEXT )) AS users FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' GROUP BY event ORDER BY count DESC; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, HOUR(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT cast(data['did'] AS TEXT ) AS user_id, MIN(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS first_post_ts FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT cast(data['did'] AS TEXT ) AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(CAST(data['time_us'] AS BIGINT))),MIN(from_microsecond(CAST(data['time_us'] AS BIGINT)))) AS activity_span FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/queries_materialized.sql b/doris/queries_materialized.sql index 9a9e630..54cd9bc 100644 --- a/doris/queries_materialized.sql +++ b/doris/queries_materialized.sql @@ -1,5 +1,5 @@ SELECT collection AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; SELECT collection AS event, COUNT(*) AS count, COUNT(DISTINCT did) AS users FROM bluesky WHERE kind = 'commit' AND operation = 'create' GROUP BY event ORDER BY count DESC; -SELECT collection AS event, HOUR(from_microsecond(time_us)) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; -SELECT did AS user_id, MIN(from_microsecond(time_us)) AS first_post_ts FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; -SELECT did AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(time_us)),MIN(from_microsecond(time_us))) AS activity_span FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; +SELECT collection AS event, HOUR(time) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT did AS user_id, MIN(time) AS first_post_ts FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT did AS user_id, MILLISECONDS_DIFF(MAX(time),MIN(time)) AS activity_span FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_default.json b/doris/results/m6i.8xlarge_bluesky_1000m_default.json new file mode 100644 index 0000000..e7417fd --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1000m_default.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (default)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-07", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 999999994, + "total_size": 214692530225, + "result": [ + [4.83,1.69,1.69], + [221.55,11.01,10.25], + [27.94,7.46,7.44], + [234.42,6.37,5.72], + [235.32,6.14,5.88] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json new file mode 100644 index 0000000..c79b6b7 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (materialized)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-07", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 999999245, + "total_size": 215010357805, + "result": [ + [1.67,1.50,1.50], + [119.07,5.62,5.38], + [14.23,1.55,1.56], + [11.38,0.51,0.50], + [0.54,0.53,0.53] + ] +} diff --git a/doris/run_queries.sh b/doris/run_queries.sh index e38a799..e1459b8 100755 --- a/doris/run_queries.sh +++ b/doris/run_queries.sh @@ -28,7 +28,7 @@ cat $QUERIES_FILE | while read -r query; do # Execute the query multiple times for i in $(seq 1 $TRIES); do - RESP=$(mysql -vvv -h127.1 -P9030 -uroot "$DB_NAME" -e "$query" | perl -nle 'if (/\((?:(\d+) min )?(\d+\.\d+) sec\)/) { $t = ($1 || 0) * 60 + $2; print $t }' ||:) + RESP=$(mysql -vvv -h127.1 -P9030 -uroot "$DB_NAME" -e "$query" | perl -nle 'if (/\((?:(\d+) min )?(\d+\.\d+) sec\)/) { $t = ($1 || 0) * 60 + $2; printf "%.2f\n", $t }' ||:) echo "Response time: ${RESP} s" done; done; diff --git a/doris/start.sh b/doris/start.sh index 13e9029..a4af767 100755 --- a/doris/start.sh +++ b/doris/start.sh @@ -1,5 +1,9 @@ #!/bin/bash +export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" +sysctl -w vm.max_map_count=2000000 +ulimit -n 655350 + ${DORIS_FULL_NAME}/be/bin/start_be.sh --daemon ${DORIS_FULL_NAME}/fe/bin/start_fe.sh --daemon From 2dfefc8f50b89875080395095ca235ad472f896b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 10 May 2025 16:22:50 +0000 Subject: [PATCH 05/10] Minor fixups --- doris/benchmark.sh | 2 ++ doris/count.sh | 2 ++ doris/drop_table.sh | 2 ++ doris/main.sh | 2 ++ doris/run_queries.sh | 2 ++ starrocks/benchmark.sh | 2 ++ starrocks/count.sh | 2 ++ starrocks/create_and_load.sh | 2 ++ starrocks/drop_table.sh | 2 ++ starrocks/main.sh | 2 ++ starrocks/run_queries.sh | 2 ++ starrocks/total_size.sh | 2 ++ 12 files changed, 24 insertions(+) diff --git a/doris/benchmark.sh b/doris/benchmark.sh index 7955f78..d7522a8 100755 --- a/doris/benchmark.sh +++ b/doris/benchmark.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in starrocks/benchmark.sh. + # Check if the required arguments are provided if [[ $# -lt 3 ]]; then echo "Usage: $0 " diff --git a/doris/count.sh b/doris/count.sh index ff30fca..12bca04 100755 --- a/doris/count.sh +++ b/doris/count.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in starrocks/count.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/doris/drop_table.sh b/doris/drop_table.sh index 6e20078..1983438 100755 --- a/doris/drop_table.sh +++ b/doris/drop_table.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in starrocks/drop_table.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/doris/main.sh b/doris/main.sh index 8d6c659..0296187 100755 --- a/doris/main.sh +++ b/doris/main.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in starrocks/main.sh. + export DORIS_FULL_NAME="apache-doris-3.0.5-bin-x64" DEFAULT_CHOICE=ask diff --git a/doris/run_queries.sh b/doris/run_queries.sh index e1459b8..b04c6d0 100755 --- a/doris/run_queries.sh +++ b/doris/run_queries.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/run_queries.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/starrocks/benchmark.sh b/starrocks/benchmark.sh index dd8834f..67414a2 100755 --- a/starrocks/benchmark.sh +++ b/starrocks/benchmark.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/benchmark.sh. + # Check if the required arguments are provided if [[ $# -lt 3 ]]; then echo "Usage: $0 " diff --git a/starrocks/count.sh b/starrocks/count.sh index ff30fca..bb27457 100755 --- a/starrocks/count.sh +++ b/starrocks/count.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/count.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/starrocks/create_and_load.sh b/starrocks/create_and_load.sh index e8d84bd..45818bc 100755 --- a/starrocks/create_and_load.sh +++ b/starrocks/create_and_load.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/create_and_load.sh. + # Check if the required arguments are provided if [[ $# -lt 7 ]]; then echo "Usage: $0 " diff --git a/starrocks/drop_table.sh b/starrocks/drop_table.sh index 6e20078..46d041f 100755 --- a/starrocks/drop_table.sh +++ b/starrocks/drop_table.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/drop_table.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/starrocks/main.sh b/starrocks/main.sh index 4c5a1c2..bb485eb 100755 --- a/starrocks/main.sh +++ b/starrocks/main.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/main.sh. + DEFAULT_CHOICE=ask DEFAULT_DATA_DIRECTORY=~/data/bluesky diff --git a/starrocks/run_queries.sh b/starrocks/run_queries.sh index 019abe9..990a1c7 100755 --- a/starrocks/run_queries.sh +++ b/starrocks/run_queries.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/run_queries.sh. + # Check if the required arguments are provided if [[ $# -lt 1 ]]; then echo "Usage: $0 " diff --git a/starrocks/total_size.sh b/starrocks/total_size.sh index b0700d0..6d8fc05 100755 --- a/starrocks/total_size.sh +++ b/starrocks/total_size.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/total_size.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " From 90367ee82d776bd4c5c3d03669057d5236915b8d Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 12 May 2025 14:29:49 +0800 Subject: [PATCH 06/10] update --- doris/ddl_default_with_index.sql | 9 +++++++++ doris/main.sh | 17 +++++++++++----- doris/queries_default_with_index.sql | 5 +++++ .../m6i.8xlarge_bluesky_1000m_default.json | 20 ------------------- ...6i.8xlarge_bluesky_1000m_materialized.json | 20 ------------------- doris/start.sh | 4 ++-- 6 files changed, 28 insertions(+), 47 deletions(-) create mode 100644 doris/ddl_default_with_index.sql create mode 100644 doris/queries_default_with_index.sql delete mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_default.json delete mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_materialized.json diff --git a/doris/ddl_default_with_index.sql b/doris/ddl_default_with_index.sql new file mode 100644 index 0000000..a554baf --- /dev/null +++ b/doris/ddl_default_with_index.sql @@ -0,0 +1,9 @@ +CREATE TABLE bluesky ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `data` variant NOT NULL, + INDEX idx_var (`data`) USING INVERTED +) +DISTRIBUTED BY HASH(id) BUCKETS 32 +PROPERTIES ( + "replication_num"="1" +); diff --git a/doris/main.sh b/doris/main.sh index 0296187..7341608 100755 --- a/doris/main.sh +++ b/doris/main.sh @@ -58,29 +58,36 @@ benchmark() { case $CHOICE in 2) benchmark 10 default + benchmark 10 default_with_index benchmark 10 materialized ;; 3) benchmark 100 default + benchmark 100 default_with_index benchmark 100 materialized ;; 4) benchmark 1000 default + benchmark 1000 default_with_index benchmark 1000 materialized ;; 5) - benchmark 1 materialized benchmark 1 default - benchmark 10 materialized + benchmark 1 default_with_index + benchmark 1 materialized benchmark 10 default - benchmark 100 materialized + benchmark 10 default_with_index + benchmark 10 materialized benchmark 100 default + benchmark 100 default_with_index + benchmark 100 materialized + benchmark 1000 default_with_index benchmark 1000 materialized - benchmark 1000 default ;; *) - benchmark 1 materialized benchmark 1 default + benchmark 1 default_with_index + benchmark 1 materialized ;; esac diff --git a/doris/queries_default_with_index.sql b/doris/queries_default_with_index.sql new file mode 100644 index 0000000..3885734 --- /dev/null +++ b/doris/queries_default_with_index.sql @@ -0,0 +1,5 @@ +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count, COUNT(DISTINCT cast(data['did'] AS TEXT )) AS users FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' GROUP BY event ORDER BY count DESC; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, HOUR(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT cast(data['did'] AS TEXT ) AS user_id, MIN(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS first_post_ts FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT cast(data['did'] AS TEXT ) AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(CAST(data['time_us'] AS BIGINT))),MIN(from_microsecond(CAST(data['time_us'] AS BIGINT)))) AS activity_span FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_default.json b/doris/results/m6i.8xlarge_bluesky_1000m_default.json deleted file mode 100644 index e7417fd..0000000 --- a/doris/results/m6i.8xlarge_bluesky_1000m_default.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Apache Doris (default)", - "version": "doris-3.0.5-rc01-e277cfb83f", - "os": "Ubuntu 24.04", - "date": "2025-05-07", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 1000000000, - "num_loaded_documents": 999999994, - "total_size": 214692530225, - "result": [ - [4.83,1.69,1.69], - [221.55,11.01,10.25], - [27.94,7.46,7.44], - [234.42,6.37,5.72], - [235.32,6.14,5.88] - ] -} diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json deleted file mode 100644 index c79b6b7..0000000 --- a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Apache Doris (materialized)", - "version": "doris-3.0.5-rc01-e277cfb83f", - "os": "Ubuntu 24.04", - "date": "2025-05-07", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 1000000000, - "num_loaded_documents": 999999245, - "total_size": 215010357805, - "result": [ - [1.67,1.50,1.50], - [119.07,5.62,5.38], - [14.23,1.55,1.56], - [11.38,0.51,0.50], - [0.54,0.53,0.53] - ] -} diff --git a/doris/start.sh b/doris/start.sh index a4af767..2e52479 100755 --- a/doris/start.sh +++ b/doris/start.sh @@ -1,8 +1,8 @@ #!/bin/bash export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" -sysctl -w vm.max_map_count=2000000 -ulimit -n 655350 +sudo sysctl -w vm.max_map_count=2000000 +sudo ulimit -n 655350 ${DORIS_FULL_NAME}/be/bin/start_be.sh --daemon ${DORIS_FULL_NAME}/fe/bin/start_fe.sh --daemon From 394e474df587b21dee27654857783dd5cadd648d Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 13 May 2025 13:10:33 +0800 Subject: [PATCH 07/10] update result and some modify --- doris/create_and_load.sh | 3 + doris/ddl_default_with_index.sql | 9 --- doris/main.sh | 9 +-- doris/queries_default_with_index.sql | 5 -- ...i.8xlarge_bluesky_1m_default.query_results | 71 +++++++++++++++++++ .../m6i.8xlarge_bluesky_1000m_default.json | 20 ++++++ ...6i.8xlarge_bluesky_1000m_materialized.json | 20 ++++++ .../m6i.8xlarge_bluesky_100m_default.json | 20 ++++++ ...m6i.8xlarge_bluesky_100m_materialized.json | 20 ++++++ .../m6i.8xlarge_bluesky_10m_default.json | 20 ++++++ .../m6i.8xlarge_bluesky_10m_materialized.json | 20 ++++++ .../m6i.8xlarge_bluesky_1m_default.json | 20 ++++++ .../m6i.8xlarge_bluesky_1m_materialized.json | 20 ++++++ doris/start.sh | 6 +- doris/total_size.sh | 1 + 15 files changed, 239 insertions(+), 25 deletions(-) delete mode 100644 doris/ddl_default_with_index.sql delete mode 100644 doris/queries_default_with_index.sql create mode 100644 doris/results/_query_results/_m6i.8xlarge_bluesky_1m_default.query_results create mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_default.json create mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_materialized.json create mode 100644 doris/results/m6i.8xlarge_bluesky_100m_default.json create mode 100644 doris/results/m6i.8xlarge_bluesky_100m_materialized.json create mode 100644 doris/results/m6i.8xlarge_bluesky_10m_default.json create mode 100644 doris/results/m6i.8xlarge_bluesky_10m_materialized.json create mode 100644 doris/results/m6i.8xlarge_bluesky_1m_default.json create mode 100644 doris/results/m6i.8xlarge_bluesky_1m_materialized.json diff --git a/doris/create_and_load.sh b/doris/create_and_load.sh index e8d84bd..9f9fcbd 100755 --- a/doris/create_and_load.sh +++ b/doris/create_and_load.sh @@ -29,3 +29,6 @@ mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "$DDL_FILE" echo "Load data" ./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" + +echo "Sleep 120 sec to collect data size" +sleep 120s diff --git a/doris/ddl_default_with_index.sql b/doris/ddl_default_with_index.sql deleted file mode 100644 index a554baf..0000000 --- a/doris/ddl_default_with_index.sql +++ /dev/null @@ -1,9 +0,0 @@ -CREATE TABLE bluesky ( - `id` BIGINT NOT NULL AUTO_INCREMENT, - `data` variant NOT NULL, - INDEX idx_var (`data`) USING INVERTED -) -DISTRIBUTED BY HASH(id) BUCKETS 32 -PROPERTIES ( - "replication_num"="1" -); diff --git a/doris/main.sh b/doris/main.sh index 7341608..12b5700 100755 --- a/doris/main.sh +++ b/doris/main.sh @@ -58,35 +58,28 @@ benchmark() { case $CHOICE in 2) benchmark 10 default - benchmark 10 default_with_index benchmark 10 materialized ;; 3) benchmark 100 default - benchmark 100 default_with_index benchmark 100 materialized ;; 4) benchmark 1000 default - benchmark 1000 default_with_index benchmark 1000 materialized ;; 5) benchmark 1 default - benchmark 1 default_with_index benchmark 1 materialized benchmark 10 default - benchmark 10 default_with_index benchmark 10 materialized benchmark 100 default - benchmark 100 default_with_index benchmark 100 materialized - benchmark 1000 default_with_index + benchmark 1000 default benchmark 1000 materialized ;; *) benchmark 1 default - benchmark 1 default_with_index benchmark 1 materialized ;; esac diff --git a/doris/queries_default_with_index.sql b/doris/queries_default_with_index.sql deleted file mode 100644 index 3885734..0000000 --- a/doris/queries_default_with_index.sql +++ /dev/null @@ -1,5 +0,0 @@ -SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; -SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count, COUNT(DISTINCT cast(data['did'] AS TEXT )) AS users FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' GROUP BY event ORDER BY count DESC; -SELECT cast(data['commit']['collection'] AS TEXT ) AS event, HOUR(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; -SELECT cast(data['did'] AS TEXT ) AS user_id, MIN(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS first_post_ts FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; -SELECT cast(data['did'] AS TEXT ) AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(CAST(data['time_us'] AS BIGINT))),MIN(from_microsecond(CAST(data['time_us'] AS BIGINT)))) AS activity_span FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/results/_query_results/_m6i.8xlarge_bluesky_1m_default.query_results b/doris/results/_query_results/_m6i.8xlarge_bluesky_1m_default.query_results new file mode 100644 index 0000000..1784b55 --- /dev/null +++ b/doris/results/_query_results/_m6i.8xlarge_bluesky_1m_default.query_results @@ -0,0 +1,71 @@ +------------------------------------------------------------------------------------------------------------------------ +Result for query Q1: ++----------------------------+--------+ +| event | count | ++----------------------------+--------+ +| app.bsky.feed.like | 448944 | +| app.bsky.graph.follow | 360374 | +| app.bsky.feed.post | 90816 | +| app.bsky.feed.repost | 58540 | +| app.bsky.graph.block | 14040 | +| app.bsky.actor.profile | 11762 | +| app.bsky.graph.listitem | 8103 | +| NULL | 5328 | +| app.bsky.graph.listblock | 895 | +| app.bsky.graph.starterpack | 405 | +| app.bsky.graph.list | 356 | +| app.bsky.feed.threadgate | 255 | +| app.bsky.feed.postgate | 104 | +| app.bsky.feed.generator | 74 | +| app.bsky.labeler.service | 4 | ++----------------------------+--------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q2: ++----------------------------+--------+--------+ +| event | count | users | ++----------------------------+--------+--------+ +| app.bsky.feed.like | 444523 | 117617 | +| app.bsky.graph.follow | 337978 | 63957 | +| app.bsky.feed.post | 86812 | 50464 | +| app.bsky.feed.repost | 56993 | 26581 | +| app.bsky.graph.block | 13838 | 5785 | +| app.bsky.graph.listitem | 7568 | 1078 | +| app.bsky.actor.profile | 5337 | 5337 | +| app.bsky.graph.listblock | 860 | 449 | +| app.bsky.graph.list | 259 | 218 | +| app.bsky.feed.threadgate | 228 | 196 | +| app.bsky.graph.starterpack | 104 | 101 | +| app.bsky.feed.postgate | 101 | 82 | +| app.bsky.feed.generator | 10 | 9 | ++----------------------------+--------+--------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q3: ++----------------------+-------------+--------+ +| event | hour_of_day | count | ++----------------------+-------------+--------+ +| app.bsky.feed.like | 16 | 444523 | +| app.bsky.feed.post | 16 | 86812 | +| app.bsky.feed.repost | 16 | 56993 | ++----------------------+-------------+--------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q4: ++----------------------------------+----------------------------+ +| user_id | first_post_ts | ++----------------------------------+----------------------------+ +| did:plc:yj3sjq3blzpynh27cumnp5ks | 2024-11-21 16:25:49.000167 | +| did:plc:l5o3qjrmfztir54cpwlv2eme | 2024-11-21 16:25:49.001905 | +| did:plc:s4bwqchfzm6gjqfeb6mexgbu | 2024-11-21 16:25:49.003907 | ++----------------------------------+----------------------------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q5: ++----------------------------------+---------------+ +| user_id | activity_span | ++----------------------------------+---------------+ +| did:plc:tsyymlun4eqjuw7hqrhmwagd | 813006 | +| did:plc:3ug235sfy2pz7cawmpsftb65 | 811602 | +| did:plc:doxhhgtxqiv47tmcovpbcqai | 811404 | ++----------------------------------+---------------+ diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_default.json b/doris/results/m6i.8xlarge_bluesky_1000m_default.json new file mode 100644 index 0000000..3939419 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1000m_default.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (default)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 999999994, + "total_size": 214623810748, + "result": [ + [4.37,1.87,1.87], + [222.24,12.50,11.81], + [27.98,7.76,7.74], + [234.62,6.73,5.87], + [235.71,6.31,5.92] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json new file mode 100644 index 0000000..ed64fe6 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (materialized)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 999999245, + "total_size": 195771051803, + "result": [ + [9.74,1.56,1.56], + [141.69,4.86,4.82], + [35.74,1.86,1.84], + [126.09,1.02,0.94], + [90.97,0.94,0.92] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_100m_default.json b/doris/results/m6i.8xlarge_bluesky_100m_default.json new file mode 100644 index 0000000..8d8ed68 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_100m_default.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (default)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 100000000, + "num_loaded_documents": 100000000, + "total_size": 21304111530, + "result": [ + [0.23,0.19,0.19], + [20.83,1.01,1.02], + [1.22,0.75,0.77], + [0.57,0.52,0.52], + [0.54,0.54,0.55] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_100m_materialized.json b/doris/results/m6i.8xlarge_bluesky_100m_materialized.json new file mode 100644 index 0000000..cf90a24 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_100m_materialized.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (materialized)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 100000000, + "num_loaded_documents": 99999968, + "total_size": 21247203213, + "result": [ + [0.20,0.18,0.18], + [11.60,0.54,0.54], + [1.49,0.17,0.17], + [0.09,0.09,0.10], + [0.10,0.10,0.09] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_10m_default.json b/doris/results/m6i.8xlarge_bluesky_10m_default.json new file mode 100644 index 0000000..24c5261 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_10m_default.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (default)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 10000000, + "num_loaded_documents": 10000000, + "total_size": 2170032226, + "result": [ + [0.05,0.04,0.04], + [1.25,0.15,0.15], + [0.10,0.11,0.11], + [0.08,0.08,0.08], + [0.08,0.09,0.08] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_10m_materialized.json b/doris/results/m6i.8xlarge_bluesky_10m_materialized.json new file mode 100644 index 0000000..090d8f4 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_10m_materialized.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (materialized)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 10000000, + "num_loaded_documents": 9999994, + "total_size": 2069100494, + "result": [ + [0.05,0.04,0.04], + [0.14,0.10,0.10], + [0.05,0.04,0.04], + [0.02,0.03,0.03], + [0.03,0.03,0.03] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_1m_default.json b/doris/results/m6i.8xlarge_bluesky_1m_default.json new file mode 100644 index 0000000..4beec78 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1m_default.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (default)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000, + "num_loaded_documents": 1000000, + "total_size": 207785820, + "result": [ + [0.06,0.04,0.03], + [0.07,0.05,0.05], + [0.05,0.04,0.04], + [0.05,0.05,0.03], + [0.04,0.03,0.04] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_1m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1m_materialized.json new file mode 100644 index 0000000..40b2a53 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1m_materialized.json @@ -0,0 +1,20 @@ +{ + "system": "Apache Doris (materialized)", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000, + "num_loaded_documents": 1000000, + "total_size": 198731366, + "result": [ + [0.02,0.02,0.02], + [0.04,0.04,0.03], + [0.03,0.02,0.02], + [0.02,0.02,0.02], + [0.02,0.02,0.03] + ] +} diff --git a/doris/start.sh b/doris/start.sh index 2e52479..e4fb3cd 100755 --- a/doris/start.sh +++ b/doris/start.sh @@ -2,15 +2,15 @@ export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" sudo sysctl -w vm.max_map_count=2000000 -sudo ulimit -n 655350 +sudo sh -c ulimit -n 655350 ${DORIS_FULL_NAME}/be/bin/start_be.sh --daemon ${DORIS_FULL_NAME}/fe/bin/start_fe.sh --daemon -echo "Sleep 30 sec" +echo "Sleep 30 sec to wait doris start" sleep 30s mysql -P 9030 -h 127.0.0.1 -u root -e "ALTER SYSTEM ADD BACKEND \"127.0.0.1:9050\";" -echo "Sleep 10 sec" +echo "Sleep 10 sec to wait frontend are connected to backend" sleep 10s diff --git a/doris/total_size.sh b/doris/total_size.sh index b0700d0..f7b5a85 100755 --- a/doris/total_size.sh +++ b/doris/total_size.sh @@ -10,4 +10,5 @@ fi DB_NAME="$1" TABLE_NAME="$2" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "ANALYZE TABLE $TABLE_NAME WITH SYNC" mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "SHOW DATA FROM $TABLE_NAME" From a6d2309ecfe14bb0828a79975e6cc40d548526c4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 May 2025 19:48:45 +0000 Subject: [PATCH 08/10] Refresh results with local measurements --- .../results/m6i.8xlarge_bluesky_1000m_default.json | 11 ++++++----- .../m6i.8xlarge_bluesky_1000m_materialized.json | 13 +++++++------ doris/results/m6i.8xlarge_bluesky_100m_default.json | 11 ++++++----- .../m6i.8xlarge_bluesky_100m_materialized.json | 13 +++++++------ doris/results/m6i.8xlarge_bluesky_10m_default.json | 11 ++++++----- .../m6i.8xlarge_bluesky_10m_materialized.json | 13 +++++++------ doris/results/m6i.8xlarge_bluesky_1m_default.json | 11 ++++++----- .../m6i.8xlarge_bluesky_1m_materialized.json | 13 +++++++------ 8 files changed, 52 insertions(+), 44 deletions(-) diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_default.json b/doris/results/m6i.8xlarge_bluesky_1000m_default.json index 3939419..fae0d18 100644 --- a/doris/results/m6i.8xlarge_bluesky_1000m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_1000m_default.json @@ -10,11 +10,12 @@ "dataset_size": 1000000000, "num_loaded_documents": 999999994, "total_size": 214623810748, + "data_size": 214623810748, "result": [ - [4.37,1.87,1.87], - [222.24,12.50,11.81], - [27.98,7.76,7.74], - [234.62,6.73,5.87], - [235.71,6.31,5.92] + [4.24,1.67,1.66], + [222.33,11.01,10.16], + [27.82,7.47,7.45], + [235.20,6.41,6.07], + [234.71,6.37,5.78] ] } diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json index ed64fe6..13d6781 100644 --- a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json +++ b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json @@ -4,17 +4,18 @@ "os": "Ubuntu 24.04", "date": "2025-05-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", + "retains_structure": "no", "tags": [ ], "dataset_size": 1000000000, "num_loaded_documents": 999999245, "total_size": 195771051803, + "data_size": 195771051803, "result": [ - [9.74,1.56,1.56], - [141.69,4.86,4.82], - [35.74,1.86,1.84], - [126.09,1.02,0.94], - [90.97,0.94,0.92] + [1.65,1.49,1.48], + [119.37,5.51,5.35], + [14.19,1.55,1.54], + [12.30,0.50,0.50], + [0.54,0.53,0.52] ] } diff --git a/doris/results/m6i.8xlarge_bluesky_100m_default.json b/doris/results/m6i.8xlarge_bluesky_100m_default.json index 8d8ed68..01499e3 100644 --- a/doris/results/m6i.8xlarge_bluesky_100m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_100m_default.json @@ -10,11 +10,12 @@ "dataset_size": 100000000, "num_loaded_documents": 100000000, "total_size": 21304111530, + "data_size": 21304111530, "result": [ - [0.23,0.19,0.19], - [20.83,1.01,1.02], - [1.22,0.75,0.77], - [0.57,0.52,0.52], - [0.54,0.54,0.55] + [0.23,0.19,0.19], + [20.94,1.01,1.04], + [1.21,0.74,0.75], + [0.54,0.51,0.55], + [0.57,0.56,0.53] ] } diff --git a/doris/results/m6i.8xlarge_bluesky_100m_materialized.json b/doris/results/m6i.8xlarge_bluesky_100m_materialized.json index cf90a24..ac8b038 100644 --- a/doris/results/m6i.8xlarge_bluesky_100m_materialized.json +++ b/doris/results/m6i.8xlarge_bluesky_100m_materialized.json @@ -4,17 +4,18 @@ "os": "Ubuntu 24.04", "date": "2025-05-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", + "retains_structure": "no", "tags": [ ], "dataset_size": 100000000, "num_loaded_documents": 99999968, "total_size": 21247203213, + "data_size": 21247203213, "result": [ - [0.20,0.18,0.18], - [11.60,0.54,0.54], - [1.49,0.17,0.17], - [0.09,0.09,0.10], - [0.10,0.10,0.09] + [0.18,0.17,0.16], + [10.87,0.53,0.53], + [0.56,0.17,0.17], + [0.09,0.08,0.09], + [0.09,0.09,0.09], ] } diff --git a/doris/results/m6i.8xlarge_bluesky_10m_default.json b/doris/results/m6i.8xlarge_bluesky_10m_default.json index 24c5261..728f5ef 100644 --- a/doris/results/m6i.8xlarge_bluesky_10m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_10m_default.json @@ -10,11 +10,12 @@ "dataset_size": 10000000, "num_loaded_documents": 10000000, "total_size": 2170032226, + "data_size": 2170032226, "result": [ - [0.05,0.04,0.04], - [1.25,0.15,0.15], - [0.10,0.11,0.11], - [0.08,0.08,0.08], - [0.08,0.09,0.08] + [1.05,0.04,0.05], + [1.33,0.15,0.15], + [0.11,0.11,0.11], + [0.08,0.08,0.08], + [0.09,0.08,0.08] ] } diff --git a/doris/results/m6i.8xlarge_bluesky_10m_materialized.json b/doris/results/m6i.8xlarge_bluesky_10m_materialized.json index 090d8f4..9b55ccf 100644 --- a/doris/results/m6i.8xlarge_bluesky_10m_materialized.json +++ b/doris/results/m6i.8xlarge_bluesky_10m_materialized.json @@ -4,17 +4,18 @@ "os": "Ubuntu 24.04", "date": "2025-05-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", + "retains_structure": "no", "tags": [ ], "dataset_size": 10000000, "num_loaded_documents": 9999994, "total_size": 2069100494, + "data_size": 2069100494, "result": [ - [0.05,0.04,0.04], - [0.14,0.10,0.10], - [0.05,0.04,0.04], - [0.02,0.03,0.03], - [0.03,0.03,0.03] + [0.04,0.04,0.03], + [0.13,0.10,0.09], + [0.04,0.04,0.04], + [0.03,0.03,0.02], + [0.03,0.03,0.04] ] } diff --git a/doris/results/m6i.8xlarge_bluesky_1m_default.json b/doris/results/m6i.8xlarge_bluesky_1m_default.json index 4beec78..a790400 100644 --- a/doris/results/m6i.8xlarge_bluesky_1m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_1m_default.json @@ -10,11 +10,12 @@ "dataset_size": 1000000, "num_loaded_documents": 1000000, "total_size": 207785820, + "data_size": 207785820, "result": [ - [0.06,0.04,0.03], - [0.07,0.05,0.05], - [0.05,0.04,0.04], - [0.05,0.05,0.03], - [0.04,0.03,0.04] + [0.05,0.02,0.03], + [0.09,0.04,0.05], + [0.06,0.04,0.04], + [0.04,0.03,0.05], + [0.03,0.04,0.03] ] } diff --git a/doris/results/m6i.8xlarge_bluesky_1m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1m_materialized.json index 40b2a53..be20c17 100644 --- a/doris/results/m6i.8xlarge_bluesky_1m_materialized.json +++ b/doris/results/m6i.8xlarge_bluesky_1m_materialized.json @@ -4,17 +4,18 @@ "os": "Ubuntu 24.04", "date": "2025-05-13", "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", + "retains_structure": "no", "tags": [ ], "dataset_size": 1000000, "num_loaded_documents": 1000000, "total_size": 198731366, + "data_size": 198731366, "result": [ - [0.02,0.02,0.02], - [0.04,0.04,0.03], - [0.03,0.02,0.02], - [0.02,0.02,0.02], - [0.02,0.02,0.03] + [0.03,0.02,0.02], + [0.05,0.04,0.04], + [0.03,0.02,0.03], + [0.02,0.03,0.02], + [0.02,0.02,0.02] ] } From 02e8736b1ec325383a71069cc8721cb1ff1fc6d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 May 2025 19:49:32 +0000 Subject: [PATCH 09/10] Update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1328379..9f1fbe8 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ While the main benchmark uses a specific machine configuration for reproducibili - [x] SingleStore - [x] GreptimeDB - [x] FerretDB +- [x] Apache Doris - [ ] Quickwit - [ ] Meilisearch - [ ] Sneller From 248810c14923508bedaec12928838715bdeb9640 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 May 2025 15:39:37 +0000 Subject: [PATCH 10/10] Remove materialized result --- doris/create_and_load.sh | 16 ++++----- doris/{ddl_default.sql => ddl.sql} | 0 doris/ddl_materialized.sql | 13 ------- doris/main.sh | 35 +++++++------------ ... => _m6i.8xlarge_bluesky_1m.query_results} | 0 ...lt.json => m6i.8xlarge_bluesky_1000m.json} | 2 +- ...6i.8xlarge_bluesky_1000m_materialized.json | 21 ----------- ...ult.json => m6i.8xlarge_bluesky_100m.json} | 2 +- ...m6i.8xlarge_bluesky_100m_materialized.json | 21 ----------- ...ault.json => m6i.8xlarge_bluesky_10m.json} | 2 +- .../m6i.8xlarge_bluesky_10m_materialized.json | 21 ----------- ...fault.json => m6i.8xlarge_bluesky_1m.json} | 2 +- .../m6i.8xlarge_bluesky_1m_materialized.json | 21 ----------- 13 files changed, 24 insertions(+), 132 deletions(-) rename doris/{ddl_default.sql => ddl.sql} (100%) delete mode 100644 doris/ddl_materialized.sql rename doris/results/_query_results/{_m6i.8xlarge_bluesky_1m_default.query_results => _m6i.8xlarge_bluesky_1m.query_results} (100%) rename doris/results/{m6i.8xlarge_bluesky_1000m_default.json => m6i.8xlarge_bluesky_1000m.json} (92%) delete mode 100644 doris/results/m6i.8xlarge_bluesky_1000m_materialized.json rename doris/results/{m6i.8xlarge_bluesky_100m_default.json => m6i.8xlarge_bluesky_100m.json} (92%) delete mode 100644 doris/results/m6i.8xlarge_bluesky_100m_materialized.json rename doris/results/{m6i.8xlarge_bluesky_10m_default.json => m6i.8xlarge_bluesky_10m.json} (92%) delete mode 100644 doris/results/m6i.8xlarge_bluesky_10m_materialized.json rename doris/results/{m6i.8xlarge_bluesky_1m_default.json => m6i.8xlarge_bluesky_1m.json} (92%) delete mode 100644 doris/results/m6i.8xlarge_bluesky_1m_materialized.json diff --git a/doris/create_and_load.sh b/doris/create_and_load.sh index 9f9fcbd..a068b8e 100755 --- a/doris/create_and_load.sh +++ b/doris/create_and_load.sh @@ -1,22 +1,20 @@ #!/bin/bash # Check if the required arguments are provided -if [[ $# -lt 7 ]]; then - echo "Usage: $0 " +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " exit 1 fi # Arguments DB_NAME="$1" TABLE_NAME="$2" -DDL_FILE="$3" -DATA_DIRECTORY="$4" -NUM_FILES="$5" -SUCCESS_LOG="$6" -ERROR_LOG="$7" +DATA_DIRECTORY="$3" +NUM_FILES="$4" +SUCCESS_LOG="$5" +ERROR_LOG="$6" # Validate arguments -[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; } [[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } [[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; } @@ -25,7 +23,7 @@ echo "Create database" mysql -P 9030 -h 127.0.0.1 -u root -e "CREATE DATABASE IF NOT EXISTS $DB_NAME" echo "Execute DDL" -mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "$DDL_FILE" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "ddl.sql" echo "Load data" ./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" diff --git a/doris/ddl_default.sql b/doris/ddl.sql similarity index 100% rename from doris/ddl_default.sql rename to doris/ddl.sql diff --git a/doris/ddl_materialized.sql b/doris/ddl_materialized.sql deleted file mode 100644 index 727987c..0000000 --- a/doris/ddl_materialized.sql +++ /dev/null @@ -1,13 +0,0 @@ -CREATE TABLE bluesky ( - kind VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.kind')) NOT NULL, - operation VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.operation')) NULL, - collection VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.collection')) NULL, - did VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data,'$.did')) NOT NULL, - time DATETIME GENERATED ALWAYS AS (from_microsecond(get_json_bigint(data, '$.time_us'))) NOT NULL, - `data` variant NOT NULL -) -DUPLICATE KEY (kind, operation, collection) -DISTRIBUTED BY HASH(collection, did) BUCKETS 32 -PROPERTIES ( - "replication_num"="1" -); diff --git a/doris/main.sh b/doris/main.sh index 12b5700..59082e9 100755 --- a/doris/main.sh +++ b/doris/main.sh @@ -41,46 +41,37 @@ fi; benchmark() { local size=$1 - local suffix=$2 # Check DATA_DIRECTORY contains the required number of files to run the benchmark file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) if (( file_count < size )); then echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." exit 1 fi - ./create_and_load.sh "bluesky_${size}m_${suffix}" bluesky "ddl_${suffix}.sql" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" - ./total_size.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.total_size" - ./count.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.count" - ./benchmark.sh "bluesky_${size}m_${suffix}" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_runtime" "queries_${suffix}.sql" - ./drop_table.sh "bluesky_${size}m_${suffix}" bluesky + ./create_and_load.sh "bluesky_${size}m" bluesky "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" + ./total_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size" + ./count.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count" + ./benchmark.sh "bluesky_${size}m" "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime" "queries.sql" + ./drop_table.sh "bluesky_${size}m" bluesky } case $CHOICE in 2) - benchmark 10 default - benchmark 10 materialized + benchmark 10 ;; 3) - benchmark 100 default - benchmark 100 materialized + benchmark 100 ;; 4) - benchmark 1000 default - benchmark 1000 materialized + benchmark 1000 ;; 5) - benchmark 1 default - benchmark 1 materialized - benchmark 10 default - benchmark 10 materialized - benchmark 100 default - benchmark 100 materialized - benchmark 1000 default - benchmark 1000 materialized + benchmark 1 + benchmark 10 + benchmark 100 + benchmark 1000 ;; *) - benchmark 1 default - benchmark 1 materialized + benchmark 1 ;; esac diff --git a/doris/results/_query_results/_m6i.8xlarge_bluesky_1m_default.query_results b/doris/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results similarity index 100% rename from doris/results/_query_results/_m6i.8xlarge_bluesky_1m_default.query_results rename to doris/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_default.json b/doris/results/m6i.8xlarge_bluesky_1000m.json similarity index 92% rename from doris/results/m6i.8xlarge_bluesky_1000m_default.json rename to doris/results/m6i.8xlarge_bluesky_1000m.json index fae0d18..e076f47 100644 --- a/doris/results/m6i.8xlarge_bluesky_1000m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_1000m.json @@ -1,5 +1,5 @@ { - "system": "Apache Doris (default)", + "system": "Apache Doris", "version": "doris-3.0.5-rc01-e277cfb83f", "os": "Ubuntu 24.04", "date": "2025-05-13", diff --git a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json deleted file mode 100644 index 13d6781..0000000 --- a/doris/results/m6i.8xlarge_bluesky_1000m_materialized.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "system": "Apache Doris (materialized)", - "version": "doris-3.0.5-rc01-e277cfb83f", - "os": "Ubuntu 24.04", - "date": "2025-05-13", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "no", - "tags": [ - ], - "dataset_size": 1000000000, - "num_loaded_documents": 999999245, - "total_size": 195771051803, - "data_size": 195771051803, - "result": [ - [1.65,1.49,1.48], - [119.37,5.51,5.35], - [14.19,1.55,1.54], - [12.30,0.50,0.50], - [0.54,0.53,0.52] - ] -} diff --git a/doris/results/m6i.8xlarge_bluesky_100m_default.json b/doris/results/m6i.8xlarge_bluesky_100m.json similarity index 92% rename from doris/results/m6i.8xlarge_bluesky_100m_default.json rename to doris/results/m6i.8xlarge_bluesky_100m.json index 01499e3..f6c3d06 100644 --- a/doris/results/m6i.8xlarge_bluesky_100m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_100m.json @@ -1,5 +1,5 @@ { - "system": "Apache Doris (default)", + "system": "Apache Doris", "version": "doris-3.0.5-rc01-e277cfb83f", "os": "Ubuntu 24.04", "date": "2025-05-13", diff --git a/doris/results/m6i.8xlarge_bluesky_100m_materialized.json b/doris/results/m6i.8xlarge_bluesky_100m_materialized.json deleted file mode 100644 index ac8b038..0000000 --- a/doris/results/m6i.8xlarge_bluesky_100m_materialized.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "system": "Apache Doris (materialized)", - "version": "doris-3.0.5-rc01-e277cfb83f", - "os": "Ubuntu 24.04", - "date": "2025-05-13", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "no", - "tags": [ - ], - "dataset_size": 100000000, - "num_loaded_documents": 99999968, - "total_size": 21247203213, - "data_size": 21247203213, - "result": [ - [0.18,0.17,0.16], - [10.87,0.53,0.53], - [0.56,0.17,0.17], - [0.09,0.08,0.09], - [0.09,0.09,0.09], - ] -} diff --git a/doris/results/m6i.8xlarge_bluesky_10m_default.json b/doris/results/m6i.8xlarge_bluesky_10m.json similarity index 92% rename from doris/results/m6i.8xlarge_bluesky_10m_default.json rename to doris/results/m6i.8xlarge_bluesky_10m.json index 728f5ef..f6523ed 100644 --- a/doris/results/m6i.8xlarge_bluesky_10m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_10m.json @@ -1,5 +1,5 @@ { - "system": "Apache Doris (default)", + "system": "Apache Doris", "version": "doris-3.0.5-rc01-e277cfb83f", "os": "Ubuntu 24.04", "date": "2025-05-13", diff --git a/doris/results/m6i.8xlarge_bluesky_10m_materialized.json b/doris/results/m6i.8xlarge_bluesky_10m_materialized.json deleted file mode 100644 index 9b55ccf..0000000 --- a/doris/results/m6i.8xlarge_bluesky_10m_materialized.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "system": "Apache Doris (materialized)", - "version": "doris-3.0.5-rc01-e277cfb83f", - "os": "Ubuntu 24.04", - "date": "2025-05-13", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "no", - "tags": [ - ], - "dataset_size": 10000000, - "num_loaded_documents": 9999994, - "total_size": 2069100494, - "data_size": 2069100494, - "result": [ - [0.04,0.04,0.03], - [0.13,0.10,0.09], - [0.04,0.04,0.04], - [0.03,0.03,0.02], - [0.03,0.03,0.04] - ] -} diff --git a/doris/results/m6i.8xlarge_bluesky_1m_default.json b/doris/results/m6i.8xlarge_bluesky_1m.json similarity index 92% rename from doris/results/m6i.8xlarge_bluesky_1m_default.json rename to doris/results/m6i.8xlarge_bluesky_1m.json index a790400..f633513 100644 --- a/doris/results/m6i.8xlarge_bluesky_1m_default.json +++ b/doris/results/m6i.8xlarge_bluesky_1m.json @@ -1,5 +1,5 @@ { - "system": "Apache Doris (default)", + "system": "Apache Doris", "version": "doris-3.0.5-rc01-e277cfb83f", "os": "Ubuntu 24.04", "date": "2025-05-13", diff --git a/doris/results/m6i.8xlarge_bluesky_1m_materialized.json b/doris/results/m6i.8xlarge_bluesky_1m_materialized.json deleted file mode 100644 index be20c17..0000000 --- a/doris/results/m6i.8xlarge_bluesky_1m_materialized.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "system": "Apache Doris (materialized)", - "version": "doris-3.0.5-rc01-e277cfb83f", - "os": "Ubuntu 24.04", - "date": "2025-05-13", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "no", - "tags": [ - ], - "dataset_size": 1000000, - "num_loaded_documents": 1000000, - "total_size": 198731366, - "data_size": 198731366, - "result": [ - [0.03,0.02,0.02], - [0.05,0.04,0.04], - [0.03,0.02,0.03], - [0.02,0.03,0.02], - [0.02,0.02,0.02] - ] -}