From 1fbea63a991abbf2d68f34527b97c55d580627dd Mon Sep 17 00:00:00 2001 From: Murphy Date: Fri, 21 Mar 2025 07:23:41 +0800 Subject: [PATCH 1/5] optimize for starrocks --- starrocks/ddl_flat.sql | 4 +++ starrocks/queries.sql | 10 ++++---- starrocks/queries_formatted.sql | 44 ++++++++++++++++----------------- 3 files changed, 31 insertions(+), 27 deletions(-) create mode 100644 starrocks/ddl_flat.sql diff --git a/starrocks/ddl_flat.sql b/starrocks/ddl_flat.sql new file mode 100644 index 0000000..1747cb6 --- /dev/null +++ b/starrocks/ddl_flat.sql @@ -0,0 +1,4 @@ +CREATE TABLE bluesky ( + `id` BIGINT AUTO_INCREMENT, + `data` JSON NULL COMMENT "Main JSON object" +); \ No newline at end of file diff --git a/starrocks/queries.sql b/starrocks/queries.sql index 9a3e6f6..0009b4d 100644 --- a/starrocks/queries.sql +++ b/starrocks/queries.sql @@ -1,5 +1,5 @@ -SELECT cast(data->'commit.collection' AS VARCHAR) AS event,count() AS count FROM bluesky GROUP BY event ORDER BY count DESC; -SELECT cast(data->'commit.collection' AS VARCHAR) AS event, count() AS count, count(DISTINCT cast(data->'did' AS VARCHAR)) AS users FROM bluesky WHERE (data->'kind' = 'commit') AND (data->'commit.operation' = 'create') GROUP BY event ORDER BY count DESC; -SELECT cast(data->'commit.collection' AS VARCHAR) AS event, hour(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000)))) as hour_of_day, count() AS count FROM bluesky WHERE (data->'kind' = 'commit') AND (data->'commit.operation' = 'create') AND (array_contains(['app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like'], cast(data->'commit.collection' AS VARCHAR))) GROUP BY event, hour_of_day ORDER BY hour_of_day, event; -SELECT cast(data->'$.did' as VARCHAR) as user_id, min(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000)))) AS first_post_date FROM bluesky WHERE (data->'kind' = 'commit') AND (data->'commit.operation' = 'create') AND (data->'commit.collection' = 'app.bsky.feed.post') GROUP BY user_id ORDER BY first_post_date ASC LIMIT 3; -SELECT cast(data->'$.did' as VARCHAR) as user_id, date_diff('millisecond', min(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000)))),max(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000))))) AS activity_span FROM bluesky WHERE (data->'kind' = 'commit') AND (data->'commit.operation' = 'create') AND (data->'commit.collection' = 'app.bsky.feed.post') GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; +SELECT get_json_string(data, 'commit.collection') AS event, count() AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT get_json_string(data, 'commit.collection') AS event, count() AS count, count(DISTINCT get_json_string(data, 'did')) AS users FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') GROUP BY event ORDER BY count DESC; +SELECT get_json_string(data, 'commit.collection') AS event, hour(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))) as hour_of_day, count() AS count FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (array_contains(['app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like'], get_json_string(data, 'commit.collection'))) GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT get_json_string(data, '$.did') as user_id, min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))) AS first_post_date FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id ORDER BY first_post_date ASC LIMIT 3; +SELECT get_json_string(data, '$.did') as user_id, date_diff('millisecond', min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))), max(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000))))) AS activity_span FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; \ No newline at end of file diff --git a/starrocks/queries_formatted.sql b/starrocks/queries_formatted.sql index b549847..16973b5 100644 --- a/starrocks/queries_formatted.sql +++ b/starrocks/queries_formatted.sql @@ -2,7 +2,7 @@ -- Q1 - Top event types ------------------------------------------------------------------------------------------------------------------------ -SELECT cast(data->'commit.collection' AS VARCHAR) AS event, +SELECT get_json_string(data, 'commit.collection') AS event, count() AS count FROM bluesky GROUP BY event @@ -12,12 +12,12 @@ ORDER BY count DESC; -- Q2 - Top event types together with unique users per event type ------------------------------------------------------------------------------------------------------------------------ SELECT - cast(data->'commit.collection' AS VARCHAR) AS event, + get_json_string(data, 'commit.collection') AS event, count() AS count, - count(DISTINCT cast(data->'did' AS VARCHAR)) AS users + count(DISTINCT get_json_string(data, 'did')) AS users FROM bluesky -WHERE (data->'kind' = 'commit') - AND (data->'commit.operation' = 'create') +WHERE (get_json_string(data, 'kind') = 'commit') + AND (get_json_string(data, 'commit.operation') = 'create') GROUP BY event ORDER BY count DESC; @@ -25,13 +25,13 @@ ORDER BY count DESC; -- Q3 - When do people use BlueSky ------------------------------------------------------------------------------------------------------------------------ SELECT - cast(data->'commit.collection' AS VARCHAR) AS event, - hour(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000)))) as hour_of_day, + get_json_string(data, 'commit.collection') AS event, + hour(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))) as hour_of_day, count() AS count FROM bluesky -WHERE (data->'kind' = 'commit') -AND (data->'commit.operation' = 'create') -AND (array_contains(['app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like'], cast(data->'commit.collection' AS VARCHAR))) +WHERE (get_json_string(data, 'kind') = 'commit') +AND (get_json_string(data, 'commit.operation') = 'create') +AND (array_contains(['app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like'], get_json_string(data, 'commit.collection'))) GROUP BY event, hour_of_day ORDER BY hour_of_day, event; @@ -39,28 +39,28 @@ ORDER BY hour_of_day, event; -- Q4 - top 3 post veterans ------------------------------------------------------------------------------------------------------------------------ SELECT - cast(data->'$.did' as VARCHAR) as user_id, - min(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000)))) AS first_post_date + get_json_string(data, '$.did') as user_id, + min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))) AS first_post_date FROM bluesky -WHERE (data->'kind' = 'commit') - AND (data->'commit.operation' = 'create') - AND (data->'commit.collection' = 'app.bsky.feed.post') +WHERE (get_json_string(data, 'kind') = 'commit') + AND (get_json_string(data, 'commit.operation') = 'create') + AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id -ORDER BY first_post_ts ASC +ORDER BY first_post_date ASC LIMIT 3; ------------------------------------------------------------------------------------------------------------------------ -- Q5 - top 3 users with longest activity ------------------------------------------------------------------------------------------------------------------------ SELECT - cast(data->'$.did' as VARCHAR) as user_id, + get_json_string(data, '$.did') as user_id, date_diff('millisecond', - min(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000)))), - max(from_unixtime(round(divide(cast(data->'time_us' AS BIGINT), 1000000))))) AS activity_span + min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))), + max(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000))))) AS activity_span FROM bluesky -WHERE (data->'kind' = 'commit') - AND (data->'commit.operation' = 'create') - AND (data->'commit.collection' = 'app.bsky.feed.post') +WHERE (get_json_string(data, 'kind') = 'commit') + AND (get_json_string(data, 'commit.operation') = 'create') + AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; From caa34dedba9afd22a7c098a0bc7552447b98ff97 Mon Sep 17 00:00:00 2001 From: Murphy Date: Fri, 21 Mar 2025 23:55:59 +0800 Subject: [PATCH 2/5] incldue ddl_flat in main.sh Signed-off-by: Murphy --- starrocks/main.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/starrocks/main.sh b/starrocks/main.sh index d825140..8123736 100755 --- a/starrocks/main.sh +++ b/starrocks/main.sh @@ -50,27 +50,35 @@ case $choice in 2) benchmark 10 lz4 benchmark 10 zstd + benchmark 10 flat ;; 3) benchmark 100 lz4 benchmark 100 zstd + benchmark 100 flat ;; 4) benchmark 1000 lz4 benchmark 1000 zstd + benchmark 1000 flat ;; 5) benchmark 1 lz4 benchmark 1 zstd + benchmark 1 flat benchmark 10 lz4 benchmark 10 zstd + benchmark 10 flat benchmark 100 lz4 benchmark 100 zstd + benchmark 100 flat benchmark 1000 lz4 benchmark 1000 zstd + benchmark 1000 flat ;; *) benchmark 1 lz4 benchmark 1 zstd + benchmark 1 flat ;; esac From 5433550d42b860fb20ad338af58f5bb851505e4e Mon Sep 17 00:00:00 2001 From: Murphy Date: Sat, 22 Mar 2025 00:23:21 +0800 Subject: [PATCH 3/5] add empty line in queries --- starrocks/queries.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/starrocks/queries.sql b/starrocks/queries.sql index 0009b4d..f126823 100644 --- a/starrocks/queries.sql +++ b/starrocks/queries.sql @@ -2,4 +2,4 @@ SELECT get_json_string(data, 'commit.collection') AS event, count() AS count FRO SELECT get_json_string(data, 'commit.collection') AS event, count() AS count, count(DISTINCT get_json_string(data, 'did')) AS users FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') GROUP BY event ORDER BY count DESC; SELECT get_json_string(data, 'commit.collection') AS event, hour(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))) as hour_of_day, count() AS count FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (array_contains(['app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like'], get_json_string(data, 'commit.collection'))) GROUP BY event, hour_of_day ORDER BY hour_of_day, event; SELECT get_json_string(data, '$.did') as user_id, min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))) AS first_post_date FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id ORDER BY first_post_date ASC LIMIT 3; -SELECT get_json_string(data, '$.did') as user_id, date_diff('millisecond', min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))), max(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000))))) AS activity_span FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; \ No newline at end of file +SELECT get_json_string(data, '$.did') as user_id, date_diff('millisecond', min(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000)))), max(from_unixtime(round(divide(get_json_int(data, 'time_us'), 1000000))))) AS activity_span FROM bluesky WHERE (get_json_string(data, 'kind') = 'commit') AND (get_json_string(data, 'commit.operation') = 'create') AND (get_json_string(data, 'commit.collection') = 'app.bsky.feed.post') GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; From 815d991b22c6495331645a36b84804adec625501 Mon Sep 17 00:00:00 2001 From: Murphy Date: Mon, 24 Mar 2025 00:17:49 +0800 Subject: [PATCH 4/5] rename --- starrocks/ddl_default.sql | 4 ++ starrocks/ddl_flat.sql | 4 -- .../{ddl_lz4.sql => ddl_materialized.sql} | 2 +- starrocks/ddl_zstd.sql | 15 ------- starrocks/main.sh | 40 ++++++++----------- 5 files changed, 21 insertions(+), 44 deletions(-) create mode 100644 starrocks/ddl_default.sql delete mode 100644 starrocks/ddl_flat.sql rename starrocks/{ddl_lz4.sql => ddl_materialized.sql} (97%) delete mode 100644 starrocks/ddl_zstd.sql diff --git a/starrocks/ddl_default.sql b/starrocks/ddl_default.sql new file mode 100644 index 0000000..c27de83 --- /dev/null +++ b/starrocks/ddl_default.sql @@ -0,0 +1,4 @@ +CREATE TABLE bluesky ( + `id` BIGINT AUTO_INCREMENT, + `data` JSON NOT NULL COMMENT "Primary JSON object, optimized for field access using FlatJSON" +); \ No newline at end of file diff --git a/starrocks/ddl_flat.sql b/starrocks/ddl_flat.sql deleted file mode 100644 index 1747cb6..0000000 --- a/starrocks/ddl_flat.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE TABLE bluesky ( - `id` BIGINT AUTO_INCREMENT, - `data` JSON NULL COMMENT "Main JSON object" -); \ No newline at end of file diff --git a/starrocks/ddl_lz4.sql b/starrocks/ddl_materialized.sql similarity index 97% rename from starrocks/ddl_lz4.sql rename to starrocks/ddl_materialized.sql index 95916c8..7320925 100644 --- a/starrocks/ddl_lz4.sql +++ b/starrocks/ddl_materialized.sql @@ -8,5 +8,5 @@ CREATE TABLE bluesky ( `collection` VARCHAR(255) AS get_json_string(data, '$.commit.collection'), `did` VARCHAR(255) AS get_json_string(data, '$.did'), `time_us` BIGINT AS get_json_int(data, '$.time_us') -) ENGINE=OLAP +) ORDER BY(`kind`, `operation`, `collection`, `did`, `time_us`); diff --git a/starrocks/ddl_zstd.sql b/starrocks/ddl_zstd.sql deleted file mode 100644 index e96786e..0000000 --- a/starrocks/ddl_zstd.sql +++ /dev/null @@ -1,15 +0,0 @@ -CREATE TABLE bluesky ( - `id` BIGINT AUTO_INCREMENT, - -- Main JSON column (comes after key columns) - `data` JSON NULL COMMENT "Main JSON object", - -- Key columns (must come first in the schema and in the same order as ORDER BY) - `kind` VARCHAR(255) AS get_json_string(data, '$.kind'), - `operation` VARCHAR(255) AS get_json_string(data, '$.commit.operation'), - `collection` VARCHAR(255) AS get_json_string(data, '$.commit.collection'), - `did` VARCHAR(255) AS get_json_string(data, '$.did'), - `time_us` BIGINT AS get_json_int(data, '$.time_us') -) ENGINE=OLAP -ORDER BY(`kind`, `operation`, `collection`, `did`, `time_us`) -PROPERTIES ( -"compression" = "ZSTD" -); diff --git a/starrocks/main.sh b/starrocks/main.sh index b22e603..7d9d2bf 100755 --- a/starrocks/main.sh +++ b/starrocks/main.sh @@ -53,38 +53,30 @@ benchmark() { case $choice in 2) - benchmark 10 lz4 - benchmark 10 zstd - benchmark 10 flat + benchmark 10 default + benchmark 10 materialized ;; 3) - benchmark 100 lz4 - benchmark 100 zstd - benchmark 100 flat + benchmark 100 default + benchmark 100 materialized ;; 4) - benchmark 1000 lz4 - benchmark 1000 zstd - benchmark 1000 flat + benchmark 1000 default + benchmark 1000 materialized ;; 5) - benchmark 1 lz4 - benchmark 1 zstd - benchmark 1 flat - benchmark 10 lz4 - benchmark 10 zstd - benchmark 10 flat - benchmark 100 lz4 - benchmark 100 zstd - benchmark 100 flat - benchmark 1000 lz4 - benchmark 1000 zstd - benchmark 1000 flat + benchmark 1 materialized + benchmark 1 default + benchmark 10 materialized + benchmark 10 default + benchmark 100 materialized + benchmark 100 default + benchmark 1000 materialized + benchmark 1000 default ;; *) - benchmark 1 lz4 - benchmark 1 zstd - benchmark 1 flat + benchmark 1 materialized + benchmark 1 default ;; esac From 0da5e4805a97a644a1b1851bdc76251d11cb8d83 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 24 Mar 2025 21:25:06 +0000 Subject: [PATCH 5/5] Add measurements --- ... => m6i.8xlarge_bluesky_1000m_default.json} | 14 +++++++------- ...6i.8xlarge_bluesky_1000m_materialized.json} | 16 ++++++++-------- ...n => m6i.8xlarge_bluesky_100m_default.json} | 18 +++++++++--------- ...m6i.8xlarge_bluesky_100m_materialized.json} | 16 ++++++++-------- ...on => m6i.8xlarge_bluesky_10m_default.json} | 18 +++++++++--------- ... m6i.8xlarge_bluesky_10m_materialized.json} | 16 ++++++++-------- ...son => m6i.8xlarge_bluesky_1m_default.json} | 16 ++++++++-------- ...> m6i.8xlarge_bluesky_1m_materialized.json} | 16 ++++++++-------- 8 files changed, 65 insertions(+), 65 deletions(-) rename starrocks/results/{m6i.8xlarge_bluesky_1000m_lz4.json => m6i.8xlarge_bluesky_1000m_default.json} (61%) rename starrocks/results/{m6i.8xlarge_bluesky_1000m_zstd.json => m6i.8xlarge_bluesky_1000m_materialized.json} (56%) rename starrocks/results/{m6i.8xlarge_bluesky_100m_lz4.json => m6i.8xlarge_bluesky_100m_default.json} (52%) rename starrocks/results/{m6i.8xlarge_bluesky_100m_zstd.json => m6i.8xlarge_bluesky_100m_materialized.json} (59%) rename starrocks/results/{m6i.8xlarge_bluesky_10m_lz4.json => m6i.8xlarge_bluesky_10m_default.json} (53%) rename starrocks/results/{m6i.8xlarge_bluesky_10m_zstd.json => m6i.8xlarge_bluesky_10m_materialized.json} (59%) rename starrocks/results/{m6i.8xlarge_bluesky_1m_lz4.json => m6i.8xlarge_bluesky_1m_default.json} (60%) rename starrocks/results/{m6i.8xlarge_bluesky_1m_zstd.json => m6i.8xlarge_bluesky_1m_materialized.json} (60%) diff --git a/starrocks/results/m6i.8xlarge_bluesky_1000m_lz4.json b/starrocks/results/m6i.8xlarge_bluesky_1000m_default.json similarity index 61% rename from starrocks/results/m6i.8xlarge_bluesky_1000m_lz4.json rename to starrocks/results/m6i.8xlarge_bluesky_1000m_default.json index 9028461..93d2f1e 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_1000m_lz4.json +++ b/starrocks/results/m6i.8xlarge_bluesky_1000m_default.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (lz4)", + "system": "Starrocks (default)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -10,14 +10,14 @@ "tags": [ ], "dataset_size": 1000000000, - "num_loaded_documents": null, + "num_loaded_documents": 804000000, "data_compression": "lz4", "total_size": null, "result": [ - [null, null, null], - [null, null, null], - [null, null, null], - [null, null, null], + [2.27,1.24,1.21], + [17.81,10.67,10.20], + [7.38,6.78,7.62], + [7.24, null, null], [null, null, null] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_1000m_zstd.json b/starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json similarity index 56% rename from starrocks/results/m6i.8xlarge_bluesky_1000m_zstd.json rename to starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json index 8e71e36..7d6d7da 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_1000m_zstd.json +++ b/starrocks/results/m6i.8xlarge_bluesky_1000m_materialized.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (zstd)", + "system": "Starrocks (materialized)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -10,14 +10,14 @@ "tags": [ ], "dataset_size": 1000000000, - "num_loaded_documents": null, + "num_loaded_documents": 997000000, "data_compression": "zstd", - "total_size": null, + "total_size": 191541000000, "result": [ - [null, null, null], - [null, null, null], - [null, null, null], - [null, null, null], + [1.75,1.56,1.54], + [49.75,41.61,31.38], + [12.90,12.58,5.76], + [5.64,6.21,6.03], [null, null, null] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_100m_lz4.json b/starrocks/results/m6i.8xlarge_bluesky_100m_default.json similarity index 52% rename from starrocks/results/m6i.8xlarge_bluesky_100m_lz4.json rename to starrocks/results/m6i.8xlarge_bluesky_100m_default.json index 7d5f3f9..a115d5e 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_100m_lz4.json +++ b/starrocks/results/m6i.8xlarge_bluesky_100m_default.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (lz4)", + "system": "Starrocks (default)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -10,14 +10,14 @@ "tags": [ ], "dataset_size": 100000000, - "num_loaded_documents": 100000000, + "num_loaded_documents": 91000000, "data_compression": "lz4", - "total_size": 19182000000, + "total_size": 17109000000, "result": [ - [0.25,0.17,0.17], - [8.13,4.33,3.82], - [3.18,3.08,3.05], - [4.06,4.07,4.12], - [4.04,4.20,3.97] + [0.61,0.16,0.16], + [19.26,7.12,7.18], + [1.12,1.08,1.08], + [0.55,0.55,0.54], + [0.60,0.60,0.60] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_100m_zstd.json b/starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json similarity index 59% rename from starrocks/results/m6i.8xlarge_bluesky_100m_zstd.json rename to starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json index ba42749..5338f07 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_100m_zstd.json +++ b/starrocks/results/m6i.8xlarge_bluesky_100m_materialized.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (zstd)", + "system": "Starrocks (materialized)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -12,12 +12,12 @@ "dataset_size": 100000000, "num_loaded_documents": 100000000, "data_compression": "zstd", - "total_size": 31200000000, + "total_size": 16190000000, "result": [ - [0.22,0.17,0.18], - [28.09,3.94,3.89], - [3.04,3.05,3.11], - [3.99,4.04,3.94], - [4.13,4.12,4.11] + [0.21,0.17,0.18], + [8.38,2.19,2.17], + [2.16,1.10,1.06], + [6.62,0.43,0.45], + [0.48,0.48,0.49] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_10m_lz4.json b/starrocks/results/m6i.8xlarge_bluesky_10m_default.json similarity index 53% rename from starrocks/results/m6i.8xlarge_bluesky_10m_lz4.json rename to starrocks/results/m6i.8xlarge_bluesky_10m_default.json index 5545fb0..030a850 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_10m_lz4.json +++ b/starrocks/results/m6i.8xlarge_bluesky_10m_default.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (lz4)", + "system": "Starrocks (default)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -10,14 +10,14 @@ "tags": [ ], "dataset_size": 10000000, - "num_loaded_documents": 9999994, + "num_loaded_documents": 7000000, "data_compression": "lz4", - "total_size": 1967000000, + "total_size": 824028000, "result": [ - [0.11,0.10,0.10], - [0.45,0.40,0.42], - [0.58,0.45,0.50], - [0.57,0.62,0.61], - [0.69,0.60,0.55] + [0.03,0.02,0.03], + [0.52,0.50,0.48], + [0.25,0.25,0.18], + [0.11,0.11,0.10], + [0.11,0.12,0.12] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_10m_zstd.json b/starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json similarity index 59% rename from starrocks/results/m6i.8xlarge_bluesky_10m_zstd.json rename to starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json index 359ddea..a75315b 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_10m_zstd.json +++ b/starrocks/results/m6i.8xlarge_bluesky_10m_materialized.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (zstd)", + "system": "Starrocks (materialized)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -12,12 +12,12 @@ "dataset_size": 10000000, "num_loaded_documents": 10000000, "data_compression": "zstd", - "total_size": 3193000000, + "total_size": 616175000, "result": [ - [0.10,0.12,0.09], - [1.79,0.43,0.37], - [0.48,0.45,0.47], - [0.57,0.70,0.62], - [0.59,0.72,0.73] + [0.09,0.13,0.12], + [0.34,0.33,0.33], + [0.22,0.20,0.26], + [0.11,0.10,0.10], + [0.11,0.10,0.11] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_1m_lz4.json b/starrocks/results/m6i.8xlarge_bluesky_1m_default.json similarity index 60% rename from starrocks/results/m6i.8xlarge_bluesky_1m_lz4.json rename to starrocks/results/m6i.8xlarge_bluesky_1m_default.json index eaf2f85..d4828d5 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_1m_lz4.json +++ b/starrocks/results/m6i.8xlarge_bluesky_1m_default.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (lz4)", + "system": "Starrocks (default)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -12,12 +12,12 @@ "dataset_size": 1000000, "num_loaded_documents": 1000000, "data_compression": "lz4", - "total_size": 201845000, + "total_size": 1, "result": [ - [0.65,0.05,0.05], - [0.36,0.28,0.29], - [0.31,0.28,0.28], - [0.52,0.52,0.51], - [0.51,0.51,0.52] + [0.05,0.05,0.06], + [0.19,0.08,0.07], + [0.13,0.13,0.13], + [0.07,0.07,0.07], + [0.07,0.07,0.07] ] } diff --git a/starrocks/results/m6i.8xlarge_bluesky_1m_zstd.json b/starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json similarity index 60% rename from starrocks/results/m6i.8xlarge_bluesky_1m_zstd.json rename to starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json index 17840a1..b53d191 100644 --- a/starrocks/results/m6i.8xlarge_bluesky_1m_zstd.json +++ b/starrocks/results/m6i.8xlarge_bluesky_1m_materialized.json @@ -1,8 +1,8 @@ { - "system": "Starrocks (zstd)", + "system": "Starrocks (materialized)", "version": "3.4.0-e94580b", "os": "Ubuntu 24.04", - "date": "2025-01-13", + "date": "2025-03-24", "machine": "m6i.8xlarge, 10000gib gp3", "cluster_size": 1, "comment": "", @@ -12,12 +12,12 @@ "dataset_size": 1000000, "num_loaded_documents": 1000000, "data_compression": "zstd", - "total_size": 322945000, + "total_size": 1, "result": [ - [0.05,0.05,0.04], - [0.37,0.28,0.29], - [0.28,0.27,0.27], - [0.51,0.51,0.51], - [0.50,0.50,0.51] + [0.06,0.05,0.04], + [0.13,0.06,0.08], + [0.14,0.13,0.13], + [0.12,0.05,0.05], + [0.05,0.05,0.05] ] }