Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clickhouse/ddl.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CREATE TABLE bluesky
(
`data` JSON(
max_dynamic_paths = 0,
max_dynamic_paths = 0, -- will become the default for large uses in future
kind LowCardinality(String),
commit.operation LowCardinality(String),
commit.collection LowCardinality(String),
Expand Down
16 changes: 7 additions & 9 deletions mongodb/create_and_load.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
#!/bin/bash

# Check if the required arguments are provided
if [[ $# -lt 7 ]]; then
echo "Usage: $0 <DB_NAME> <COLLECTION_NAME> <DDL_FILE> <DATA_DIRECTORY> <NUM_FILES> <SUCCESS_LOG> <ERROR_LOG>"
if [[ $# -lt 6 ]]; then
echo "Usage: $0 <DB_NAME> <COLLECTION_NAME> <DATA_DIRECTORY> <NUM_FILES> <SUCCESS_LOG> <ERROR_LOG>"
exit 1
fi

# Arguments
DB_NAME="$1"
COLLECTION_NAME="$2"
DDL_FILE="$3"
DATA_DIRECTORY="$4"
NUM_FILES="$5"
SUCCESS_LOG="$6"
ERROR_LOG="$7"
DATA_DIRECTORY="$3"
NUM_FILES="$4"
SUCCESS_LOG="$5"
ERROR_LOG="$6"

# Validate arguments
[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; }
[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; }
[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; }

# Create database and execute DDL file
mongosh --quiet --eval "
db = db.getSiblingDB('$DB_NAME');
load('$DDL_FILE');
load('ddl.js');
"

echo "Loading data"
Expand Down
File renamed without changes.
6 changes: 0 additions & 6 deletions mongodb/ddl_snappy.js

This file was deleted.

44 changes: 18 additions & 26 deletions mongodb/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,50 +36,42 @@ fi

benchmark() {
local size=$1
local compression=$2
local compression=x
# Check DATA_DIRECTORY contains the required number of files to run the benchmark
file_count=$(find "$DATA_DIRECTORY" -type f | wc -l)
if (( file_count < size )); then
echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count."
exit 1
fi
./create_and_load.sh "bluesky_${size}m_${compression}" bluesky "ddl_${compression}.js" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG"
./total_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.total_size"
./data_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.data_size"
./index_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_size"
./count.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.count"
#./query_results.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.query_results"
./index_usage.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_usage"
./benchmark.sh "bluesky_${size}m_${compression}" "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.results_runtime"
./drop_table.sh "bluesky_${size}m_${compression}"
./create_and_load.sh "bluesky_${size}m" bluesky "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG"
./total_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size"
./data_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.data_size"
./index_size.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.index_size"
./count.sh "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count"
#./query_results.sh "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.query_results"
./index_usage.sh "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.index_usage"
./benchmark.sh "bluesky_${size}m" "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime"
./drop_table.sh "bluesky_${size}m"
}

case $CHOICE in
2)
benchmark 10 snappy
benchmark 10 zstd
benchmark 10
;;
3)
benchmark 100 snappy
benchmark 100 zstd
benchmark 100
;;
4)
benchmark 1000 snappy
benchmark 1000 zstd
benchmark 1000
;;
5)
benchmark 1 snappy
benchmark 1 zstd
benchmark 10 snappy
benchmark 10 zstd
benchmark 100 snappy
benchmark 100 zstd
benchmark 1000 snappy
benchmark 1000 zstd
benchmark 1
benchmark 10
benchmark 100
benchmark 1000
;;
*)
benchmark 1 snappy
benchmark 1 zstd
benchmark 1
;;
esac

Expand Down
Loading