diff --git a/README.md b/README.md index dcd19ad..9ec8214 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ While the main benchmark uses a specific machine configuration for reproducibili - [x] SingleStore - [x] GreptimeDB - [x] FerretDB +- [x] Apache Doris - [ ] Quickwit - [ ] Meilisearch - [ ] Sneller diff --git a/doris/benchmark.sh b/doris/benchmark.sh new file mode 100755 index 0000000..d7522a8 --- /dev/null +++ b/doris/benchmark.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# If you change something in this file, please change also in starrocks/benchmark.sh. + +# Check if the required arguments are provided +if [[ $# -lt 3 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +RESULT_FILE_RUNTIMES="$2" +QUERIES_FILE="$3" + +# Construct the query log file name using $DB_NAME +QUERY_LOG_FILE="query_log.txt" + +# Print the database name +echo "Running queries on database: $DB_NAME" + +# Run queries and log the output +./run_queries.sh "$DB_NAME" "$QUERIES_FILE" 2>&1 | tee query_log.txt + +# Process the query log and prepare the result +RESULT=$(cat query_log.txt | grep -oP 'Response time: \d+\.\d+ s' | sed -r -e 's/Response time: ([0-9]+\.[0-9]+) s/\1/' | \ +awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }') + +# Output the result +if [[ -n "$RESULT_FILE_RUNTIMES" ]]; then + echo "$RESULT" > "$RESULT_FILE_RUNTIMES" + echo "Result written to $RESULT_FILE_RUNTIMES" +else + echo "$RESULT" +fi diff --git a/doris/count.sh b/doris/count.sh new file mode 100755 index 0000000..12bca04 --- /dev/null +++ b/doris/count.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# If you change something in this file, please change also in starrocks/count.sh. + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +TABLE_NAME="$2" + +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "SELECT count() FROM $TABLE_NAME;" diff --git a/doris/create_and_load.sh b/doris/create_and_load.sh new file mode 100755 index 0000000..a068b8e --- /dev/null +++ b/doris/create_and_load.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +TABLE_NAME="$2" +DATA_DIRECTORY="$3" +NUM_FILES="$4" +SUCCESS_LOG="$5" +ERROR_LOG="$6" + +# Validate arguments +[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } +[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; } + + +echo "Create database" +mysql -P 9030 -h 127.0.0.1 -u root -e "CREATE DATABASE IF NOT EXISTS $DB_NAME" + +echo "Execute DDL" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME < "ddl.sql" + +echo "Load data" +./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" + +echo "Sleep 120 sec to collect data size" +sleep 120s diff --git a/doris/ddl.sql b/doris/ddl.sql new file mode 100644 index 0000000..4997a90 --- /dev/null +++ b/doris/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE bluesky ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `data` variant NOT NULL +) +DISTRIBUTED BY HASH(id) BUCKETS 32 +PROPERTIES ( + "replication_num"="1" +); diff --git a/doris/drop_table.sh b/doris/drop_table.sh new file mode 100755 index 0000000..1983438 --- /dev/null +++ b/doris/drop_table.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# If you change something in this file, please change also in starrocks/drop_table.sh. + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +DB_NAME="$1" +TABLE_NAME="$2" + +echo "Dropping table: $DB_NAME.$TABLE_NAME" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "DROP TABLE IF EXISTS $TABLE_NAME" diff --git a/doris/install.sh b/doris/install.sh new file mode 100755 index 0000000..3b8015a --- /dev/null +++ b/doris/install.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +wget https://apache-doris-releases.oss-accelerate.aliyuncs.com/${DORIS_FULL_NAME}.tar.gz +mkdir ${DORIS_FULL_NAME} +tar -xvf ${DORIS_FULL_NAME}.tar.gz --strip-components 1 -C ${DORIS_FULL_NAME} + +sudo apt-get update +sudo apt-get install -y mysql-client openjdk-17-jre-headless # somehow _EXACTLY_ v17 is needed diff --git a/doris/load_data.sh b/doris/load_data.sh new file mode 100755 index 0000000..df28b5b --- /dev/null +++ b/doris/load_data.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " + exit 1 +fi + + +# Arguments +DATA_DIRECTORY="$1" +DB_NAME="$2" +TABLE_NAME="$3" +MAX_FILES="$4" +SUCCESS_LOG="$5" +ERROR_LOG="$6" + +# Validate arguments +[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } +[[ ! "$MAX_FILES" =~ ^[0-9]+$ ]] && { echo "Error: MAX_FILES must be a positive integer."; exit 1; } + +# Create a temporary directory for uncompressed files +TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX) +trap "rm -rf $TEMP_DIR" EXIT # Cleanup temp directory on script exit + +# Load data +counter=0 +start=0 +for file in $(ls "$DATA_DIRECTORY"/*.json.gz | head -n "$MAX_FILES"); do + echo "Processing file: $file" + num=$(echo "$file" | sed -n 's/[^0-9]*\([0-9]\+\).*/\1/p') + if [ "$num" -le "$start" ]; then + continue + fi + + # Uncompress the file into the TEMP_DIR + uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")" + gunzip -c "$file" > "$uncompressed_file" + + if [[ $? -ne 0 ]]; then + echo "Error: Failed to uncompress $file" >> "$ERROR_LOG" + continue + fi + MAX_ATTEMPT=10 + attempt=0 + while [ $attempt -lt $MAX_ATTEMPT ] + do + # Attempt the import + http_code=$(curl -s -w "%{http_code}" -o >(cat >/tmp/curl_body) --location-trusted -u root: -H "max_filter_ratio: 0.1" -H "Expect:100-continue" -H "columns: data" -T "$uncompressed_file" -XPUT http://127.0.0.1:8030/api/"$DB_NAME"/"$TABLE_NAME"/_stream_load) + response_body="$(cat /tmp/curl_body)" + response_status="$(cat /tmp/curl_body | jq -r '.Status')" + echo $response_status + if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then + if [ "$response_status" = "Success" ] + then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $file. Response: $response_body" >> "$SUCCESS_LOG" + rm -f "$uncompressed_file" # Delete the uncompressed file after successful processing + attempt=$((MAX_ATTEMPT)) + else + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $attempt attempt failed for $file with status code $http_code. Response: $response_body" >> "$ERROR_LOG" + attempt=$((attempt + 1)) + sleep 2 + fi + else + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $attempt attempt failed for $file with status code $http_code. Response: $response_body" >> "$ERROR_LOG" + attempt=$((attempt + 1)) + sleep 2 + fi + done + + counter=$((counter + 1)) + if [[ $counter -ge $MAX_FILES ]]; then + break + fi +done diff --git a/doris/main.sh b/doris/main.sh new file mode 100755 index 0000000..59082e9 --- /dev/null +++ b/doris/main.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# If you change something in this file, please change also in starrocks/main.sh. + +export DORIS_FULL_NAME="apache-doris-3.0.5-bin-x64" + +DEFAULT_CHOICE=ask +DEFAULT_DATA_DIRECTORY=~/data/bluesky + +# Allow the user to optionally provide the scale factor ("choice") as an argument +CHOICE="${1:-$DEFAULT_CHOICE}" + +# Allow the user to optionally provide the data directory as an argument +DATA_DIRECTORY="${2:-$DEFAULT_DATA_DIRECTORY}" + +# Define success and error log files +SUCCESS_LOG="${3:-success.log}" +ERROR_LOG="${4:-error.log}" + +# Define prefix for output files +OUTPUT_PREFIX="${5:-_m6i.8xlarge}" + +# Check if the directory exists +if [[ ! -d "$DATA_DIRECTORY" ]]; then + echo "Error: Data directory '$DATA_DIRECTORY' does not exist." + exit 1 +fi + +if [ "$CHOICE" = "ask" ]; then + echo "Select the dataset size to benchmark:" + echo "1) 1m (default)" + echo "2) 10m" + echo "3) 100m" + echo "4) 1000m" + echo "5) all" + read -p "Enter the number corresponding to your choice: " CHOICE +fi; + +./install.sh +./start.sh + +benchmark() { + local size=$1 + # Check DATA_DIRECTORY contains the required number of files to run the benchmark + file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) + if (( file_count < size )); then + echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." + exit 1 + fi + ./create_and_load.sh "bluesky_${size}m" bluesky "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" + ./total_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size" + ./count.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count" + ./benchmark.sh "bluesky_${size}m" "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime" "queries.sql" + ./drop_table.sh "bluesky_${size}m" bluesky +} + +case $CHOICE in + 2) + benchmark 10 + ;; + 3) + benchmark 100 + ;; + 4) + benchmark 1000 + ;; + 5) + benchmark 1 + benchmark 10 + benchmark 100 + benchmark 1000 + ;; + *) + benchmark 1 + ;; +esac + +./stop.sh +./uninstall.sh diff --git a/doris/queries_default.sql b/doris/queries_default.sql new file mode 100644 index 0000000..3885734 --- /dev/null +++ b/doris/queries_default.sql @@ -0,0 +1,5 @@ +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count, COUNT(DISTINCT cast(data['did'] AS TEXT )) AS users FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' GROUP BY event ORDER BY count DESC; +SELECT cast(data['commit']['collection'] AS TEXT ) AS event, HOUR(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT cast(data['did'] AS TEXT ) AS user_id, MIN(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS first_post_ts FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT cast(data['did'] AS TEXT ) AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(CAST(data['time_us'] AS BIGINT))),MIN(from_microsecond(CAST(data['time_us'] AS BIGINT)))) AS activity_span FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/queries_materialized.sql b/doris/queries_materialized.sql new file mode 100644 index 0000000..54cd9bc --- /dev/null +++ b/doris/queries_materialized.sql @@ -0,0 +1,5 @@ +SELECT collection AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC; +SELECT collection AS event, COUNT(*) AS count, COUNT(DISTINCT did) AS users FROM bluesky WHERE kind = 'commit' AND operation = 'create' GROUP BY event ORDER BY count DESC; +SELECT collection AS event, HOUR(time) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event; +SELECT did AS user_id, MIN(time) AS first_post_ts FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3; +SELECT did AS user_id, MILLISECONDS_DIFF(MAX(time),MIN(time)) AS activity_span FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3; diff --git a/doris/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results b/doris/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results new file mode 100644 index 0000000..1784b55 --- /dev/null +++ b/doris/results/_query_results/_m6i.8xlarge_bluesky_1m.query_results @@ -0,0 +1,71 @@ +------------------------------------------------------------------------------------------------------------------------ +Result for query Q1: ++----------------------------+--------+ +| event | count | ++----------------------------+--------+ +| app.bsky.feed.like | 448944 | +| app.bsky.graph.follow | 360374 | +| app.bsky.feed.post | 90816 | +| app.bsky.feed.repost | 58540 | +| app.bsky.graph.block | 14040 | +| app.bsky.actor.profile | 11762 | +| app.bsky.graph.listitem | 8103 | +| NULL | 5328 | +| app.bsky.graph.listblock | 895 | +| app.bsky.graph.starterpack | 405 | +| app.bsky.graph.list | 356 | +| app.bsky.feed.threadgate | 255 | +| app.bsky.feed.postgate | 104 | +| app.bsky.feed.generator | 74 | +| app.bsky.labeler.service | 4 | ++----------------------------+--------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q2: ++----------------------------+--------+--------+ +| event | count | users | ++----------------------------+--------+--------+ +| app.bsky.feed.like | 444523 | 117617 | +| app.bsky.graph.follow | 337978 | 63957 | +| app.bsky.feed.post | 86812 | 50464 | +| app.bsky.feed.repost | 56993 | 26581 | +| app.bsky.graph.block | 13838 | 5785 | +| app.bsky.graph.listitem | 7568 | 1078 | +| app.bsky.actor.profile | 5337 | 5337 | +| app.bsky.graph.listblock | 860 | 449 | +| app.bsky.graph.list | 259 | 218 | +| app.bsky.feed.threadgate | 228 | 196 | +| app.bsky.graph.starterpack | 104 | 101 | +| app.bsky.feed.postgate | 101 | 82 | +| app.bsky.feed.generator | 10 | 9 | ++----------------------------+--------+--------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q3: ++----------------------+-------------+--------+ +| event | hour_of_day | count | ++----------------------+-------------+--------+ +| app.bsky.feed.like | 16 | 444523 | +| app.bsky.feed.post | 16 | 86812 | +| app.bsky.feed.repost | 16 | 56993 | ++----------------------+-------------+--------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q4: ++----------------------------------+----------------------------+ +| user_id | first_post_ts | ++----------------------------------+----------------------------+ +| did:plc:yj3sjq3blzpynh27cumnp5ks | 2024-11-21 16:25:49.000167 | +| did:plc:l5o3qjrmfztir54cpwlv2eme | 2024-11-21 16:25:49.001905 | +| did:plc:s4bwqchfzm6gjqfeb6mexgbu | 2024-11-21 16:25:49.003907 | ++----------------------------------+----------------------------+ + +------------------------------------------------------------------------------------------------------------------------ +Result for query Q5: ++----------------------------------+---------------+ +| user_id | activity_span | ++----------------------------------+---------------+ +| did:plc:tsyymlun4eqjuw7hqrhmwagd | 813006 | +| did:plc:3ug235sfy2pz7cawmpsftb65 | 811602 | +| did:plc:doxhhgtxqiv47tmcovpbcqai | 811404 | ++----------------------------------+---------------+ diff --git a/doris/results/m6i.8xlarge_bluesky_1000m.json b/doris/results/m6i.8xlarge_bluesky_1000m.json new file mode 100644 index 0000000..e076f47 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1000m.json @@ -0,0 +1,21 @@ +{ + "system": "Apache Doris", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000000, + "num_loaded_documents": 999999994, + "total_size": 214623810748, + "data_size": 214623810748, + "result": [ + [4.24,1.67,1.66], + [222.33,11.01,10.16], + [27.82,7.47,7.45], + [235.20,6.41,6.07], + [234.71,6.37,5.78] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_100m.json b/doris/results/m6i.8xlarge_bluesky_100m.json new file mode 100644 index 0000000..f6c3d06 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_100m.json @@ -0,0 +1,21 @@ +{ + "system": "Apache Doris", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 100000000, + "num_loaded_documents": 100000000, + "total_size": 21304111530, + "data_size": 21304111530, + "result": [ + [0.23,0.19,0.19], + [20.94,1.01,1.04], + [1.21,0.74,0.75], + [0.54,0.51,0.55], + [0.57,0.56,0.53] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_10m.json b/doris/results/m6i.8xlarge_bluesky_10m.json new file mode 100644 index 0000000..f6523ed --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_10m.json @@ -0,0 +1,21 @@ +{ + "system": "Apache Doris", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 10000000, + "num_loaded_documents": 10000000, + "total_size": 2170032226, + "data_size": 2170032226, + "result": [ + [1.05,0.04,0.05], + [1.33,0.15,0.15], + [0.11,0.11,0.11], + [0.08,0.08,0.08], + [0.09,0.08,0.08] + ] +} diff --git a/doris/results/m6i.8xlarge_bluesky_1m.json b/doris/results/m6i.8xlarge_bluesky_1m.json new file mode 100644 index 0000000..f633513 --- /dev/null +++ b/doris/results/m6i.8xlarge_bluesky_1m.json @@ -0,0 +1,21 @@ +{ + "system": "Apache Doris", + "version": "doris-3.0.5-rc01-e277cfb83f", + "os": "Ubuntu 24.04", + "date": "2025-05-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000, + "num_loaded_documents": 1000000, + "total_size": 207785820, + "data_size": 207785820, + "result": [ + [0.05,0.02,0.03], + [0.09,0.04,0.05], + [0.06,0.04,0.04], + [0.04,0.03,0.05], + [0.03,0.04,0.03] + ] +} diff --git a/doris/run_queries.sh b/doris/run_queries.sh new file mode 100755 index 0000000..b04c6d0 --- /dev/null +++ b/doris/run_queries.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# If you change something in this file, please change also in doris/run_queries.sh. + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +QUERIES_FILE="$2" + +TRIES=3 + +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "set global parallel_pipeline_task_num=32;" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "set global enable_parallel_scan=false;" + +cat $QUERIES_FILE | while read -r query; do + + # Clear the Linux file system cache + echo "Clearing file system cache..." + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + echo "File system cache cleared." + + # Print the query + echo "Running query: $query" + + # Execute the query multiple times + for i in $(seq 1 $TRIES); do + RESP=$(mysql -vvv -h127.1 -P9030 -uroot "$DB_NAME" -e "$query" | perl -nle 'if (/\((?:(\d+) min )?(\d+\.\d+) sec\)/) { $t = ($1 || 0) * 60 + $2; printf "%.2f\n", $t }' ||:) + echo "Response time: ${RESP} s" + done; +done; diff --git a/doris/start.sh b/doris/start.sh new file mode 100755 index 0000000..e4fb3cd --- /dev/null +++ b/doris/start.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" +sudo sysctl -w vm.max_map_count=2000000 +sudo sh -c ulimit -n 655350 + +${DORIS_FULL_NAME}/be/bin/start_be.sh --daemon +${DORIS_FULL_NAME}/fe/bin/start_fe.sh --daemon + +echo "Sleep 30 sec to wait doris start" +sleep 30s + +mysql -P 9030 -h 127.0.0.1 -u root -e "ALTER SYSTEM ADD BACKEND \"127.0.0.1:9050\";" + +echo "Sleep 10 sec to wait frontend are connected to backend" +sleep 10s diff --git a/doris/stop.sh b/doris/stop.sh new file mode 100755 index 0000000..513f5ad --- /dev/null +++ b/doris/stop.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +${DORIS_FULL_NAME}/be/bin/stop_be.sh +${DORIS_FULL_NAME}/fe/bin/stop_fe.sh diff --git a/doris/total_size.sh b/doris/total_size.sh new file mode 100755 index 0000000..f7b5a85 --- /dev/null +++ b/doris/total_size.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +TABLE_NAME="$2" + +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "ANALYZE TABLE $TABLE_NAME WITH SYNC" +mysql -P 9030 -h 127.0.0.1 -u root $DB_NAME -e "SHOW DATA FROM $TABLE_NAME" diff --git a/doris/uninstall.sh b/doris/uninstall.sh new file mode 100755 index 0000000..89a5c4b --- /dev/null +++ b/doris/uninstall.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +sudo apt-get remove -y mysql-client openjdk-17-jre-headless + +rm -rf ${DORIS_FULL_NAME} diff --git a/starrocks/benchmark.sh b/starrocks/benchmark.sh index dd8834f..67414a2 100755 --- a/starrocks/benchmark.sh +++ b/starrocks/benchmark.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/benchmark.sh. + # Check if the required arguments are provided if [[ $# -lt 3 ]]; then echo "Usage: $0 " diff --git a/starrocks/count.sh b/starrocks/count.sh index ff30fca..bb27457 100755 --- a/starrocks/count.sh +++ b/starrocks/count.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/count.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/starrocks/create_and_load.sh b/starrocks/create_and_load.sh index 15fa017..2db7d1d 100755 --- a/starrocks/create_and_load.sh +++ b/starrocks/create_and_load.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/create_and_load.sh. + # Check if the required arguments are provided if [[ $# -lt 6 ]]; then echo "Usage: $0 " diff --git a/starrocks/drop_table.sh b/starrocks/drop_table.sh index 6e20078..46d041f 100755 --- a/starrocks/drop_table.sh +++ b/starrocks/drop_table.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/drop_table.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 " diff --git a/starrocks/main.sh b/starrocks/main.sh index cb8c8d8..5e24dfe 100755 --- a/starrocks/main.sh +++ b/starrocks/main.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/main.sh. + DEFAULT_CHOICE=ask DEFAULT_DATA_DIRECTORY=~/data/bluesky diff --git a/starrocks/run_queries.sh b/starrocks/run_queries.sh index 019abe9..990a1c7 100755 --- a/starrocks/run_queries.sh +++ b/starrocks/run_queries.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/run_queries.sh. + # Check if the required arguments are provided if [[ $# -lt 1 ]]; then echo "Usage: $0 " diff --git a/starrocks/total_size.sh b/starrocks/total_size.sh index b0700d0..6d8fc05 100755 --- a/starrocks/total_size.sh +++ b/starrocks/total_size.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please change also in doris/total_size.sh. + # Check if the required arguments are provided if [[ $# -lt 2 ]]; then echo "Usage: $0 "