From b14b76a6219c973e52dd08212d38b890f7b4c581 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 May 2025 15:13:16 +0000 Subject: [PATCH] Starrocks: Remove materialized results --- starrocks/create_and_load.sh | 16 ++++---- starrocks/{ddl_default.sql => ddl.sql} | 0 starrocks/ddl_materialized.sql | 12 ------ starrocks/main.sh | 37 +++++++------------ ...lt.json => m6i.8xlarge_bluesky_1000m.json} | 2 +- ...6i.8xlarge_bluesky_1000m_materialized.json | 20 ---------- ...ult.json => m6i.8xlarge_bluesky_100m.json} | 2 +- ...m6i.8xlarge_bluesky_100m_materialized.json | 20 ---------- ...ault.json => m6i.8xlarge_bluesky_10m.json} | 2 +- .../m6i.8xlarge_bluesky_10m_materialized.json | 20 ---------- ...fault.json => m6i.8xlarge_bluesky_1m.json} | 2 +- .../m6i.8xlarge_bluesky_1m_materialized.json | 20 ---------- 12 files changed, 25 insertions(+), 128 deletions(-) rename starrocks/{ddl_default.sql => ddl.sql} (100%) delete mode 100644 starrocks/ddl_materialized.sql rename starrocks/results/{m6i.8xlarge_bluesky_1000m_default.json => m6i.8xlarge_bluesky_1000m.json} (91%) delete mode 100644 starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json rename starrocks/results/{m6i.8xlarge_bluesky_100m_default.json => m6i.8xlarge_bluesky_100m.json} (91%) delete mode 100644 starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json rename starrocks/results/{m6i.8xlarge_bluesky_10m_default.json => m6i.8xlarge_bluesky_10m.json} (91%) delete mode 100644 starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json rename starrocks/results/{m6i.8xlarge_bluesky_1m_default.json => m6i.8xlarge_bluesky_1m.json} (91%) delete mode 100644 starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json diff --git a/starrocks/create_and_load.sh b/starrocks/create_and_load.sh index e8d84bd..15fa017 100755 --- a/starrocks/create_and_load.sh +++ b/starrocks/create_and_load.sh @@ -1,22 +1,20 @@ #!/bin/bash # Check if the required arguments are provided -if [[ $# -lt 7 ]]; then - echo "Usage: $0 " +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " exit 1 fi # Arguments DB_NAME="$1" TABLE_NAME="$2" -DDL_FILE="$3" -DATA_DIRECTORY="$4" -NUM_FILES="$5" -SUCCESS_LOG="$6" -ERROR_LOG="$7" +DATA_DIRECTORY="$3" +NUM_FILES="$4" +SUCCESS_LOG="$5" +ERROR_LOG="$6" # Validate arguments -[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; } [[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } [[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; } @@ -25,7 +23,7 @@ echo "Create database" mysql -P 9030 -h 127.0.0.1 -u root -e "CREATE DATABASE IF NOT EXISTS $DB_NAME" echo "Execute DDL" -mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "$DDL_FILE" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "ddl.sql" echo "Load data" ./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" diff --git a/starrocks/ddl_default.sql b/starrocks/ddl.sql similarity index 100% rename from starrocks/ddl_default.sql rename to starrocks/ddl.sql diff --git a/starrocks/ddl_materialized.sql b/starrocks/ddl_materialized.sql deleted file mode 100644 index 7320925..0000000 --- a/starrocks/ddl_materialized.sql +++ /dev/null @@ -1,12 +0,0 @@ -CREATE TABLE bluesky ( - `id` BIGINT AUTO_INCREMENT, - -- Main JSON column (comes after key columns) - `data` JSON NULL COMMENT "Main JSON object", - -- Key columns (must come first in the schema and in the same order as DUPLICATE KEY) - `kind` VARCHAR(255) AS get_json_string(data, '$.kind'), - `operation` VARCHAR(255) AS get_json_string(data, '$.commit.operation'), - `collection` VARCHAR(255) AS get_json_string(data, '$.commit.collection'), - `did` VARCHAR(255) AS get_json_string(data, '$.did'), - `time_us` BIGINT AS get_json_int(data, '$.time_us') -) -ORDER BY(`kind`, `operation`, `collection`, `did`, `time_us`); diff --git a/starrocks/main.sh b/starrocks/main.sh index 4c5a1c2..cb8c8d8 100755 --- a/starrocks/main.sh +++ b/starrocks/main.sh @@ -36,47 +36,38 @@ fi; benchmark() { local size=$1 - local suffix=$2 # Check DATA_DIRECTORY contains the required number of files to run the benchmark file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) if (( file_count < size )); then echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." exit 1 fi - ./create_and_load.sh "bluesky_${size}m_${suffix}" bluesky "ddl_${suffix}.sql" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" - ./total_size.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.total_size" - ./count.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.count" - ./physical_query_plans.sh "bluesky_${size}m_${suffix}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.physical_query_plans" - ./benchmark.sh "bluesky_${size}m_${suffix}" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_runtime" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_memory_usage" - ./drop_table.sh "bluesky_${size}m_${suffix}" bluesky + ./create_and_load.sh "bluesky_${size}m" bluesky "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" + ./total_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size" + ./count.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count" + ./physical_query_plans.sh "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.physical_query_plans" + ./benchmark.sh "bluesky_${size}m" "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime" "${OUTPUT_PREFIX}_bluesky_${size}m.results_memory_usage" + ./drop_table.sh "bluesky_${size}m" bluesky } case $CHOICE in 2) - benchmark 10 default - benchmark 10 materialized + benchmark 10 ;; 3) - benchmark 100 default - benchmark 100 materialized + benchmark 100 ;; 4) - benchmark 1000 default - benchmark 1000 materialized + benchmark 1000 ;; 5) - benchmark 1 materialized - benchmark 1 default - benchmark 10 materialized - benchmark 10 default - benchmark 100 materialized - benchmark 100 default - benchmark 1000 materialized - benchmark 1000 default + benchmark 1 + benchmark 10 + benchmark 100 + benchmark 1000 ;; *) - benchmark 1 materialized - benchmark 1 default + benchmark 1 ;; esac diff --git a/starrocks/results/m6i.8xlarge_bluesky_1000m_default.json b/starrocks/results/m6i.8xlarge_bluesky_1000m.json similarity index 91% rename from starrocks/results/m6i.8xlarge_bluesky_1000m_default.json rename to starrocks/results/m6i.8xlarge_bluesky_1000m.json index bf73dee..de425df 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_1000m_default.json +++ b/starrocks/results/m6i.8xlarge_bluesky_1000m.json @@ -1,5 +1,5 @@ { - "system": "Starrocks (default)", + "system": "Starrocks", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", "date": "2025-03-24", diff --git a/starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json b/starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json deleted file mode 100644 index d85da6e..0000000 --- a/starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Starrocks (materialized)", - "version": "3.4.0-e94580b", - "os": "Ubuntu 24.04", - "date": "2025-03-24", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 1000000000, - "num_loaded_documents": 997000000, - "total_size": 191541000000, - "result": [ - [1.75,1.56,1.54], - [49.75,41.61,31.38], - [12.90,12.58,5.76], - [5.64,6.21,6.03], - [null, null, null] - ] -} diff --git a/starrocks/results/m6i.8xlarge_bluesky_100m_default.json b/starrocks/results/m6i.8xlarge_bluesky_100m.json similarity index 91% rename from starrocks/results/m6i.8xlarge_bluesky_100m_default.json rename to starrocks/results/m6i.8xlarge_bluesky_100m.json index fc054d5..c40d531 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_100m_default.json +++ b/starrocks/results/m6i.8xlarge_bluesky_100m.json @@ -1,5 +1,5 @@ { - "system": "Starrocks (default)", + "system": "Starrocks", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", "date": "2025-03-24", diff --git a/starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json b/starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json deleted file mode 100644 index 16adf68..0000000 --- a/starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Starrocks (materialized)", - "version": "3.4.0-e94580b", - "os": "Ubuntu 24.04", - "date": "2025-03-24", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 100000000, - "num_loaded_documents": 100000000, - "total_size": 16190000000, - "result": [ - [0.21,0.17,0.18], - [8.38,2.19,2.17], - [2.16,1.10,1.06], - [6.62,0.43,0.45], - [0.48,0.48,0.49] - ] -} diff --git a/starrocks/results/m6i.8xlarge_bluesky_10m_default.json b/starrocks/results/m6i.8xlarge_bluesky_10m.json similarity index 91% rename from starrocks/results/m6i.8xlarge_bluesky_10m_default.json rename to starrocks/results/m6i.8xlarge_bluesky_10m.json index eb6f38c..8bb6970 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_10m_default.json +++ b/starrocks/results/m6i.8xlarge_bluesky_10m.json @@ -1,5 +1,5 @@ { - "system": "Starrocks (default)", + "system": "Starrocks", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", "date": "2025-03-24", diff --git a/starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json b/starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json deleted file mode 100644 index d4d7fe3..0000000 --- a/starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Starrocks (materialized)", - "version": "3.4.0-e94580b", - "os": "Ubuntu 24.04", - "date": "2025-03-24", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 10000000, - "num_loaded_documents": 10000000, - "total_size": 616175000, - "result": [ - [0.09,0.13,0.12], - [0.34,0.33,0.33], - [0.22,0.20,0.26], - [0.11,0.10,0.10], - [0.11,0.10,0.11] - ] -} diff --git a/starrocks/results/m6i.8xlarge_bluesky_1m_default.json b/starrocks/results/m6i.8xlarge_bluesky_1m.json similarity index 91% rename from starrocks/results/m6i.8xlarge_bluesky_1m_default.json rename to starrocks/results/m6i.8xlarge_bluesky_1m.json index 238b38a..5635826 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_1m_default.json +++ b/starrocks/results/m6i.8xlarge_bluesky_1m.json @@ -1,5 +1,5 @@ { - "system": "Starrocks (default)", + "system": "Starrocks", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", "date": "2025-03-24", diff --git a/starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json b/starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json deleted file mode 100644 index f9e6e33..0000000 --- a/starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "system": "Starrocks (materialized)", - "version": "3.4.0-e94580b", - "os": "Ubuntu 24.04", - "date": "2025-03-24", - "machine": "m6i.8xlarge, 10000gib gp3", - "retains_structure": "yes", - "tags": [ - ], - "dataset_size": 1000000, - "num_loaded_documents": 1000000, - "total_size": 1, - "result": [ - [0.06,0.05,0.04], - [0.13,0.06,0.08], - [0.14,0.13,0.13], - [0.12,0.05,0.05], - [0.05,0.05,0.05] - ] -}