From 11fadfb9b1752cf7323842fc40010dc69056c513 Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 03:02:29 +0530 Subject: [PATCH 01/14] temp commit --- ferretdb/benchmark.sh | 32 +++++++ ferretdb/count.sh | 31 +++++++ ferretdb/create_and_load.sh | 31 +++++++ ferretdb/data_size.sh | 26 ++++++ ferretdb/ddl_snappy.js | 6 ++ ferretdb/ddl_zstd.js | 6 ++ ferretdb/drop_table.sh | 13 +++ ferretdb/index_size.sh | 26 ++++++ ferretdb/index_usage.sh | 56 ++++++++++++ ferretdb/install.sh | 24 ++++++ ferretdb/load_data.sh | 76 ++++++++++++++++ ferretdb/main.sh | 86 ++++++++++++++++++ ferretdb/queries.js | 5 ++ ferretdb/queries_formatted.js | 158 ++++++++++++++++++++++++++++++++++ ferretdb/query_results.sh | 44 ++++++++++ ferretdb/run_queries.sh | 80 +++++++++++++++++ ferretdb/total_size.sh | 26 ++++++ ferretdb/uninstall.sh | 4 + 18 files changed, 730 insertions(+) create mode 100755 ferretdb/benchmark.sh create mode 100755 ferretdb/count.sh create mode 100755 ferretdb/create_and_load.sh create mode 100755 ferretdb/data_size.sh create mode 100644 ferretdb/ddl_snappy.js create mode 100644 ferretdb/ddl_zstd.js create mode 100755 ferretdb/drop_table.sh create mode 100755 ferretdb/index_size.sh create mode 100755 ferretdb/index_usage.sh create mode 100755 ferretdb/install.sh create mode 100755 ferretdb/load_data.sh create mode 100755 ferretdb/main.sh create mode 100644 ferretdb/queries.js create mode 100644 ferretdb/queries_formatted.js create mode 100755 ferretdb/query_results.sh create mode 100755 ferretdb/run_queries.sh create mode 100755 ferretdb/total_size.sh create mode 100755 ferretdb/uninstall.sh diff --git a/ferretdb/benchmark.sh b/ferretdb/benchmark.sh new file mode 100755 index 0000000..3158efb --- /dev/null +++ b/ferretdb/benchmark.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [RESULT_FILE]" + exit 1 +fi + +# Arguments +DB_NAME="$1" +RESULT_FILE="${2:-}" + +# Construct the query log file name using $DB_NAME +QUERY_LOG_FILE="_query_log_${DB_NAME}.txt" + +# Print the database name +echo "Running queries on database: $DB_NAME" + +# Run queries and log the output +./run_queries.sh "$DB_NAME" 2>&1 | tee "$QUERY_LOG_FILE" + +# Process the query log and prepare the result +RESULT=$(cat "$QUERY_LOG_FILE" | grep -oP 'Execution time: \d+ms' | sed -r 's/Execution time: ([0-9]+)/\1/' | \ +awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }') + +# Output the result +if [[ -n "$RESULT_FILE" ]]; then + echo "$RESULT" > "$RESULT_FILE" + echo "Result written to $RESULT_FILE" +else + echo "$RESULT" +fi \ No newline at end of file diff --git a/ferretdb/count.sh b/ferretdb/count.sh new file mode 100755 index 0000000..f5c0206 --- /dev/null +++ b/ferretdb/count.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DATABASE_NAME="$1" +COLLECTION_NAME="$2" + +# Fetch the document count using mongosh +document_count=$(mongosh --quiet --eval " + const db = db.getSiblingDB('$DATABASE_NAME'); + const count = db.getCollection('$COLLECTION_NAME').stats().count + print(count); +") + +# Debugging information +echo "Database: $DATABASE_NAME" +echo "Collection: $COLLECTION_NAME" +echo "Document count: $document_count" + +# Print the result +if [[ -z "$document_count" ]]; then + echo "Error: Unable to fetch document count. Ensure the database and collection exist." + exit 1 +else + echo $document_count +fi \ No newline at end of file diff --git a/ferretdb/create_and_load.sh b/ferretdb/create_and_load.sh new file mode 100755 index 0000000..222a989 --- /dev/null +++ b/ferretdb/create_and_load.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 7 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" +COLLECTION_NAME="$2" +DDL_FILE="$3" +DATA_DIRECTORY="$4" +NUM_FILES="$5" +SUCCESS_LOG="$6" +ERROR_LOG="$7" + +# Validate arguments +[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; } +[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } +[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; } + +# Create database and execute DDL file +mongosh --quiet --eval " + db = db.getSiblingDB('$DB_NAME'); + load('$DDL_FILE'); +" + + +echo "Loading data" +./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$COLLECTION_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" diff --git a/ferretdb/data_size.sh b/ferretdb/data_size.sh new file mode 100755 index 0000000..0b3d49a --- /dev/null +++ b/ferretdb/data_size.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DATABASE_NAME="$1" +COLLECTION_NAME="$2" + +# Fetch the totalSize using mongosh +total_size=$(mongosh --quiet --eval " + const db = db.getSiblingDB('$DATABASE_NAME'); + const stats = db.getCollection('$COLLECTION_NAME').stats(); + print(stats.storageSize); +") + +# Print the result +if [[ -z "$total_size" ]]; then + echo "Error: Unable to fetch totalSize. Ensure the database and collection exist." + exit 1 +else + echo $total_size +fi \ No newline at end of file diff --git a/ferretdb/ddl_snappy.js b/ferretdb/ddl_snappy.js new file mode 100644 index 0000000..c4577d1 --- /dev/null +++ b/ferretdb/ddl_snappy.js @@ -0,0 +1,6 @@ +db.createCollection( + "bluesky", + { storageEngine: { wiredTiger: { configString: "block_compressor=snappy" } } } +); + +db.bluesky.createIndex({"kind": 1, "commit.operation": 1, "commit.collection": 1, "did": 1, "time_us": 1}); \ No newline at end of file diff --git a/ferretdb/ddl_zstd.js b/ferretdb/ddl_zstd.js new file mode 100644 index 0000000..ae5f59e --- /dev/null +++ b/ferretdb/ddl_zstd.js @@ -0,0 +1,6 @@ +db.createCollection( + "bluesky", + { storageEngine: { wiredTiger: { configString: "block_compressor=zstd" } } } +); + +db.bluesky.createIndex({"kind": 1, "commit.operation": 1, "commit.collection": 1, "did": 1, "time_us": 1}); \ No newline at end of file diff --git a/ferretdb/drop_table.sh b/ferretdb/drop_table.sh new file mode 100755 index 0000000..038a9b5 --- /dev/null +++ b/ferretdb/drop_table.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +DB_NAME="$1" + +echo "Dropping database: $DB_NAME" + +mongosh --eval "use $DB_NAME" --eval "db.dropDatabase()" diff --git a/ferretdb/index_size.sh b/ferretdb/index_size.sh new file mode 100755 index 0000000..b298d3a --- /dev/null +++ b/ferretdb/index_size.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DATABASE_NAME="$1" +COLLECTION_NAME="$2" + +# Fetch the totalSize using mongosh +total_size=$(mongosh --quiet --eval " + const db = db.getSiblingDB('$DATABASE_NAME'); + const stats = db.getCollection('$COLLECTION_NAME').stats(); + print(stats.totalIndexSize); +") + +# Print the result +if [[ -z "$total_size" ]]; then + echo "Error: Unable to fetch totalSize. Ensure the database and collection exist." + exit 1 +else + echo $total_size +fi \ No newline at end of file diff --git a/ferretdb/index_usage.sh b/ferretdb/index_usage.sh new file mode 100755 index 0000000..f2c2ad7 --- /dev/null +++ b/ferretdb/index_usage.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" + +QUERY_NUM=1 + +# File containing MongoDB queries (replace 'queries.js' with your file) +QUERY_FILE="queries.js" + +# Check if the query file exists +if [[ ! -f "$QUERY_FILE" ]]; then + echo "Error: Query file '$QUERY_FILE' does not exist." + exit 1 +fi + +# Set the internalQueryPlannerGenerateCoveredWholeIndexScans parameter to true +echo "Setting internalQueryPlannerGenerateCoveredWholeIndexScans to true..." +mongosh --quiet --eval " + const result = db.adminCommand({ setParameter: 1, internalQueryPlannerGenerateCoveredWholeIndexScans: true }); + if (result.ok !== 1) { + print('Failed to set internalQueryPlannerGenerateCoveredWholeIndexScans: ' + JSON.stringify(result)); + quit(1); + } else { + print('Successfully set internalQueryPlannerGenerateCoveredWholeIndexScans to true'); + } +" + +cat "$QUERY_FILE" | while read -r query; do + + # Print the query number + echo "------------------------------------------------------------------------------------------------------------------------" + echo "Index usage for query Q$QUERY_NUM:" + echo + + # Modify the query to include the explain option inside the aggregate call + MODIFIED_QUERY=$(echo "$query" | sed 's/]);$/], { explain: "queryPlanner" });/') + + # Escape the modified query for safe passing to mongosh + ESCAPED_QUERY=$(echo "$MODIFIED_QUERY" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') + + mongosh --quiet --eval " + const db = db.getSiblingDB('$DB_NAME'); + const result = eval(\"$ESCAPED_QUERY\"); + printjson(result.stages[0].\$cursor.queryPlanner.winningPlan); + " + + # Increment the query number + QUERY_NUM=$((QUERY_NUM + 1)) +done; \ No newline at end of file diff --git a/ferretdb/install.sh b/ferretdb/install.sh new file mode 100755 index 0000000..d5ec0b3 --- /dev/null +++ b/ferretdb/install.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# https://docs.ferretdb.io/installation/ferretdb/deb/ + +sudo snap install docker + +# Run postgresql with documentdb as storage extension +docker run -d --name postgres \ + --platform linux/amd64 \ + --restart on-failure \ + -e POSTGRES_USER=username \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_DB=postgres \ + -v pgdata:/var/lib/postgresql/data \ + ghcr.io/ferretdb/postgres-documentdb:17-0.102.0-ferretdb-2.0.0 + +# Run ferretdb +docker run -d --name ferretdb \ + --restart on-failure \ + --link postgres \ + -p 27017:27017 \ + -e FERRETDB_POSTGRESQL_URL=postgres://username:password@postgres:5432/postgres \ + ghcr.io/ferretdb/ferretdb:2.0.0 + diff --git a/ferretdb/load_data.sh b/ferretdb/load_data.sh new file mode 100755 index 0000000..7624047 --- /dev/null +++ b/ferretdb/load_data.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DIRECTORY="$1" +DB_NAME="$2" +COLLECTION_NAME="$3" +MAX_FILES="$4" +SUCCESS_LOG="$5" +ERROR_LOG="$6" +MONGO_URI="mongodb://localhost:27017" # Replace with your MongoDB URI if necessary + +# Validate that MAX_FILES is a number +if ! [[ "$MAX_FILES" =~ ^[0-9]+$ ]]; then + echo "Error: must be a positive integer." + exit 1 +fi + +# Ensure the log files exist +touch "$SUCCESS_LOG" "$ERROR_LOG" + +# Create a temporary directory for uncompressed files +TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX) +trap "rm -rf $TEMP_DIR" EXIT # Ensure cleanup on script exit + +# Counter to track processed files +counter=0 + +# Loop through each .json.gz file in the directory +for file in $(ls "$DIRECTORY"/*.json.gz 2>/dev/null | sort); do + if [[ -f "$file" ]]; then + echo "Processing $file..." + counter=$((counter + 1)) + + # Uncompress the file into the TEMP_DIR + uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")" + gunzip -c "$file" > "$uncompressed_file" + + # Check if uncompression was successful + if [[ $? -ne 0 ]]; then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed to uncompress $file." >> "$ERROR_LOG" + continue + fi + + # Import the uncompressed JSON file into MongoDB + mongoimport --uri "$MONGO_URI" --db "$DB_NAME" --collection "$COLLECTION_NAME" --file "$uncompressed_file" + import_status=$? + + # Check if the import was successful + if [[ $import_status -eq 0 ]]; then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $uncompressed_file into MongoDB." >> "$SUCCESS_LOG" + else + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed to import $uncompressed_file into MongoDB." >> "$ERROR_LOG" + fi + + # Remove the uncompressed file after processing + rm -f "$uncompressed_file" + + # Stop processing if the max number of files is reached + if [[ $counter -ge $MAX_FILES ]]; then + echo "Processed maximum number of files: $MAX_FILES" + break + fi + fi +done + +if [[ $counter -eq 0 ]]; then + echo "No .json.gz files found in the directory." +fi + +echo "All files have been processed." \ No newline at end of file diff --git a/ferretdb/main.sh b/ferretdb/main.sh new file mode 100755 index 0000000..57d79ac --- /dev/null +++ b/ferretdb/main.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +DEFAULT_CHOICE=ask +DEFAULT_DATA_DIRECTORY=~/data/bluesky + +# Allow the user to optionally provide the scale factor ("choice") as an argument +CHOICE="${1:-$DEFAULT_CHOICE}" + +# Allow the user to optionally provide the data directory as an argument +DATA_DIRECTORY="${2:-$DEFAULT_DATA_DIRECTORY}" + +# Define success and error log files +SUCCESS_LOG="${3:-success.log}" +ERROR_LOG="${4:-error.log}" + +# Define prefix for output files +OUTPUT_PREFIX="${5:-_m6i.8xlarge}" + +# Check if the directory exists +if [[ ! -d "$DATA_DIRECTORY" ]]; then + echo "Error: Data directory '$DATA_DIRECTORY' does not exist." + exit 1 +fi + +if [ "$CHOICE" = "ask" ]; then + echo "Select the dataset size to benchmark:" + echo "1) 1m (default)" + echo "2) 10m" + echo "3) 100m" + echo "4) 1000m" + echo "5) all" + read -p "Enter the number corresponding to your choice: " CHOICE +fi + +./install.sh + +benchmark() { + local size=$1 + local compression=$2 + # Check DATA_DIRECTORY contains the required number of files to run the benchmark + file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) + if (( file_count < size )); then + echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." + exit 1 + fi + ./create_and_load.sh "bluesky_${size}m_${compression}" bluesky "ddl_${compression}.js" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" + ./total_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.total_size" + ./data_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.data_size" + ./index_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_size" + ./count.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.count" + #./query_results.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.query_results" + ./index_usage.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_usage" + ./benchmark.sh "bluesky_${size}m_${compression}" "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.results_runtime" + ./drop_table.sh "bluesky_${size}m_${compression}" +} + +case $choice in + 2) + benchmark 10 snappy + benchmark 10 zstd + ;; + 3) + benchmark 100 snappy + benchmark 100 zstd + ;; + 4) + benchmark 1000 snappy + benchmark 1000 zstd + ;; + 5) + benchmark 1 snappy + benchmark 1 zstd + benchmark 10 snappy + benchmark 10 zstd + benchmark 100 snappy + benchmark 100 zstd + benchmark 1000 snappy + benchmark 1000 zstd + ;; + *) + benchmark 1 snappy + benchmark 1 zstd + ;; +esac + +./uninstall.sh diff --git a/ferretdb/queries.js b/ferretdb/queries.js new file mode 100644 index 0000000..b536c49 --- /dev/null +++ b/ferretdb/queries.js @@ -0,0 +1,5 @@ +db.bluesky.aggregate([ { $group: { _id: "$commit.collection", count: { $sum: 1 } } }, { $sort: { count: -1 } } ]); +db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create" } }, { $group: { _id: "$commit.collection", count: { $sum: 1 }, users: { $addToSet: "$did" } } }, { $project: { event: "$_id", count: 1, users: { $size: "$users" } } }, { $sort: { count: -1 } } ]); +db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": { $in: ["app.bsky.feed.post", "app.bsky.feed.repost", "app.bsky.feed.like"] } } }, { $project: { _id: 0, event: "$commit.collection", hour_of_day: { $hour: { $toDate: { $divide: ["$time_us", 1000] } } } } }, { $group: { _id: { event: "$event", hour_of_day: "$hour_of_day" }, count: { $sum: 1 } } }, { $sort: { "_id.hour_of_day": 1, "_id.event": 1 } } ]); +db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", first_post_ts: { $min: "$timestamp" } } }, { $sort: { first_post_ts: 1 } }, { $limit: 3 } ]); +db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", min_timestamp: { $min: "$timestamp" }, max_timestamp: { $max: "$timestamp" } } }, { $project: { activity_span: { $dateDiff: { startDate: "$min_timestamp", endDate: "$max_timestamp", unit: "millisecond" } } } }, { $sort: { activity_span: -1 } }, { $limit: 3 } ]); \ No newline at end of file diff --git a/ferretdb/queries_formatted.js b/ferretdb/queries_formatted.js new file mode 100644 index 0000000..616ed48 --- /dev/null +++ b/ferretdb/queries_formatted.js @@ -0,0 +1,158 @@ +// ------------------------------------------------------------------------------------------------------------------------ +// -- Q1 - Top event types +// --------------------------------------------------------------------------------------------------------------------- +db.bluesky.aggregate([ + { + $group: { + _id: "$commit.collection", + count: { $sum: 1 } + } + }, + { + $sort: { count: -1 } + } +]); + +// --------------------------------------------------------------------------------------------------------------------- +// -- Q2 - Top event types together with unique users per event type +// --------------------------------------------------------------------------------------------------------------------- +db.bluesky.aggregate([ + { + $match: { + "kind": "commit", + "commit.operation": "create" + } + }, + + { + $group: { + _id: "$commit.collection", + count: { $sum: 1 }, + users: { $addToSet: "$did" } + } + }, + { + $project: { + event: "$_id", + count: 1, + users: { $size: "$users" } + } + }, + { + $sort: { count: -1 } + } +]); + +// --------------------------------------------------------------------------------------------------------------------- +// -- Q3 - When do people use BlueSky +// --------------------------------------------------------------------------------------------------------------------- +db.bluesky.aggregate([ + { + $match: { + "kind": "commit", + "commit.operation": "create", + "commit.collection": { + $in: ["app.bsky.feed.post", "app.bsky.feed.repost", "app.bsky.feed.like"] + } + } + }, + { + $project: { + _id: 0, + event: "$commit.collection", + hour_of_day: { + $hour: { + $toDate: { $divide: ["$time_us", 1000] } + } + } + } + }, + { + $group: { + _id: { event: "$event", hour_of_day: "$hour_of_day" }, + count: { $sum: 1 } + } + }, + { + $sort: { + "_id.hour_of_day": 1, + "_id.event": 1 + } + } +]); + +// --------------------------------------------------------------------------------------------------------------------- +// -- Q4 - top 3 post veterans +// --------------------------------------------------------------------------------------------------------------------- +db.bluesky.aggregate([ + { + $match: { + "kind": "commit", + "commit.operation": "create", + "commit.collection": "app.bsky.feed.post" + } + }, + { + $project: { + _id: 0, + user_id: "$did", + timestamp: { $toDate: { $divide: ["$time_us", 1000] } } + } + }, + { + $group: { + _id: "$user_id", + first_post_ts: { $min: "$timestamp" } + } + }, + { + $sort: { first_post_ts: 1 } + }, + { + $limit: 3 + } +]); + +// --------------------------------------------------------------------------------------------------------------------- +// -- Q5 - top 3 users with longest activity +// --------------------------------------------------------------------------------------------------------------------- +db.bluesky.aggregate([ + { + $match: { + "kind": "commit", + "commit.operation": "create", + "commit.collection": "app.bsky.feed.post" + } + }, + { + $project: { + _id: 0, + user_id: "$did", + timestamp: { $toDate: { $divide: ["$time_us", 1000] } } + } + }, + { + $group: { + _id: "$user_id", + min_timestamp: { $min: "$timestamp" }, + max_timestamp: { $max: "$timestamp" } + } + }, + { + $project: { + activity_span: { + $dateDiff: { + startDate: "$min_timestamp", + endDate: "$max_timestamp", + unit: "millisecond" + } + } + } + }, + { + $sort: { activity_span: -1 } + }, + { + $limit: 3 + } +]); diff --git a/ferretdb/query_results.sh b/ferretdb/query_results.sh new file mode 100755 index 0000000..8c7c247 --- /dev/null +++ b/ferretdb/query_results.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" + +QUERY_NUM=1 + +# File containing MongoDB queries (replace 'queries.js' with your file) +QUERY_FILE="queries.js" + +# Check if the query file exists +if [[ ! -f "$QUERY_FILE" ]]; then + echo "Error: Query file '$QUERY_FILE' does not exist." + exit 1 +fi + +# Read and execute each query +cat "$QUERY_FILE" | while read -r query; do + + # Print the query + echo "------------------------------------------------------------------------------------------------------------------------" + echo "Result for query Q$QUERY_NUM:" + echo + + # Escape the query for safe passing to mongosh + ESCAPED_QUERY=$(echo "$query" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') + + mongosh --eval " + const db = db.getSiblingDB('$DB_NAME'); + const result = eval(\"$ESCAPED_QUERY\"); + printjson(result); + " + + + # Increment the query number + QUERY_NUM=$((QUERY_NUM + 1)) + +done \ No newline at end of file diff --git a/ferretdb/run_queries.sh b/ferretdb/run_queries.sh new file mode 100755 index 0000000..43c670a --- /dev/null +++ b/ferretdb/run_queries.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" + +# Number of tries for each query +TRIES=3 + +# File containing MongoDB queries (replace 'queries.js' with your file) +QUERY_FILE="queries.js" + +# Check if the query file exists +if [[ ! -f "$QUERY_FILE" ]]; then + echo "Error: Query file '$QUERY_FILE' does not exist." + exit 1 +fi + +# Set the internalQueryMaxAddToSetBytes parameter to 1 GB +echo "Setting internalQueryMaxAddToSetBytes to 1 GB..." +mongosh --quiet --eval " + const result = db.adminCommand({ setParameter: 1, internalQueryMaxAddToSetBytes: 1073741824 }); + if (result.ok !== 1) { + print('Failed to set internalQueryMaxAddToSetBytes: ' + JSON.stringify(result)); + quit(1); + } else { + print('Successfully set internalQueryMaxAddToSetBytes to 1 GB'); + } +" + +# Set the internalQueryPlannerGenerateCoveredWholeIndexScans parameter to true +echo "Setting internalQueryPlannerGenerateCoveredWholeIndexScans to true..." +mongosh --quiet --eval " + const result = db.adminCommand({ setParameter: 1, internalQueryPlannerGenerateCoveredWholeIndexScans: true }); + if (result.ok !== 1) { + print('Failed to set internalQueryPlannerGenerateCoveredWholeIndexScans: ' + JSON.stringify(result)); + quit(1); + } else { + print('Successfully set internalQueryPlannerGenerateCoveredWholeIndexScans to true'); + } +" + +# Read and execute each query +cat "$QUERY_FILE" | while read -r query; do + + # Clear the Linux file system cache + echo "Clearing file system cache..." + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + echo "File system cache cleared." + + # Print the query + echo "Running query: $query" + + # Escape the query for safe passing to mongosh + ESCAPED_QUERY=$(echo "$query" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') + + # Execute the query multiple times + for i in $(seq 1 $TRIES); do + mongosh --quiet --eval " + const db = db.getSiblingDB('$DB_NAME'); + const start = new Date(); + const result = eval(\"$ESCAPED_QUERY\"); + // Force query execution -> When using commands like aggregate() or find(), + // the query is not fully executed until the data is actually fetched or processed. + if (Array.isArray(result)) { + result.length; // Access the length to force evaluation for arrays + } else if (typeof result === 'object' && typeof result.toArray === 'function') { + result.toArray(); // Force execution for cursors + } + const end = new Date(); + print('Execution time: ' + (end.getTime() - start.getTime()) + 'ms'); + " + done +done \ No newline at end of file diff --git a/ferretdb/total_size.sh b/ferretdb/total_size.sh new file mode 100755 index 0000000..e05a6ca --- /dev/null +++ b/ferretdb/total_size.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Check if the required arguments are provided +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DATABASE_NAME="$1" +COLLECTION_NAME="$2" + +# Fetch the totalSize using mongosh +total_size=$(mongosh --quiet --eval " + const db = db.getSiblingDB('$DATABASE_NAME'); + const stats = db.getCollection('$COLLECTION_NAME').stats(); + print(stats.totalSize); +") + +# Print the result +if [[ -z "$total_size" ]]; then + echo "Error: Unable to fetch totalSize. Ensure the database and collection exist." + exit 1 +else + echo $total_size +fi \ No newline at end of file diff --git a/ferretdb/uninstall.sh b/ferretdb/uninstall.sh new file mode 100755 index 0000000..8a4e8a0 --- /dev/null +++ b/ferretdb/uninstall.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +docker stop ferretdb +docker rm ferretdb From 40923c31a268a46e6548386ec54756a4e5960912 Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 05:19:51 +0530 Subject: [PATCH 02/14] Fix benchmarking --- ferretdb/index_usage.sh | 2 +- ferretdb/install.sh | 14 ++++++++++++-- ferretdb/uninstall.sh | 4 ++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/ferretdb/index_usage.sh b/ferretdb/index_usage.sh index f2c2ad7..19676f8 100755 --- a/ferretdb/index_usage.sh +++ b/ferretdb/index_usage.sh @@ -48,7 +48,7 @@ cat "$QUERY_FILE" | while read -r query; do mongosh --quiet --eval " const db = db.getSiblingDB('$DB_NAME'); const result = eval(\"$ESCAPED_QUERY\"); - printjson(result.stages[0].\$cursor.queryPlanner.winningPlan); + printjson(result); " # Increment the query number diff --git a/ferretdb/install.sh b/ferretdb/install.sh index d5ec0b3..2ed19a8 100755 --- a/ferretdb/install.sh +++ b/ferretdb/install.sh @@ -1,8 +1,17 @@ #!/bin/bash -# https://docs.ferretdb.io/installation/ferretdb/deb/ -sudo snap install docker +# install docker +# sudo snap install docker + +# install mongosh +# sudo sudo apt-get install gnupg curl +#curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | \ +# sudo gpg --dearmor --yes -o /usr/share/keyrings/mongodb-server-8.0.gpg +#echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] https://repo.mongodb.org/apt/ubuntu noble/mongodb-org/8.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-8.0.list +#sudo apt-get update +#sudo apt-get install -y mongodb-org + # Run postgresql with documentdb as storage extension docker run -d --name postgres \ @@ -20,5 +29,6 @@ docker run -d --name ferretdb \ --link postgres \ -p 27017:27017 \ -e FERRETDB_POSTGRESQL_URL=postgres://username:password@postgres:5432/postgres \ + -e FERRETDB_AUTH=false \ ghcr.io/ferretdb/ferretdb:2.0.0 diff --git a/ferretdb/uninstall.sh b/ferretdb/uninstall.sh index 8a4e8a0..a78a7e0 100755 --- a/ferretdb/uninstall.sh +++ b/ferretdb/uninstall.sh @@ -2,3 +2,7 @@ docker stop ferretdb docker rm ferretdb + +docker stop postgres +docker rm postgres +docker volume rm pgdata \ No newline at end of file From f18b09e079e110fbf7d6a578f30a15acf64e410f Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 05:29:52 +0530 Subject: [PATCH 03/14] Add new line --- ferretdb/queries.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ferretdb/queries.js b/ferretdb/queries.js index b536c49..ebdbc75 100644 --- a/ferretdb/queries.js +++ b/ferretdb/queries.js @@ -2,4 +2,4 @@ db.bluesky.aggregate([ { $group: { _id: "$commit.collection", count: { $sum: 1 } db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create" } }, { $group: { _id: "$commit.collection", count: { $sum: 1 }, users: { $addToSet: "$did" } } }, { $project: { event: "$_id", count: 1, users: { $size: "$users" } } }, { $sort: { count: -1 } } ]); db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": { $in: ["app.bsky.feed.post", "app.bsky.feed.repost", "app.bsky.feed.like"] } } }, { $project: { _id: 0, event: "$commit.collection", hour_of_day: { $hour: { $toDate: { $divide: ["$time_us", 1000] } } } } }, { $group: { _id: { event: "$event", hour_of_day: "$hour_of_day" }, count: { $sum: 1 } } }, { $sort: { "_id.hour_of_day": 1, "_id.event": 1 } } ]); db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", first_post_ts: { $min: "$timestamp" } } }, { $sort: { first_post_ts: 1 } }, { $limit: 3 } ]); -db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", min_timestamp: { $min: "$timestamp" }, max_timestamp: { $max: "$timestamp" } } }, { $project: { activity_span: { $dateDiff: { startDate: "$min_timestamp", endDate: "$max_timestamp", unit: "millisecond" } } } }, { $sort: { activity_span: -1 } }, { $limit: 3 } ]); \ No newline at end of file +db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", min_timestamp: { $min: "$timestamp" }, max_timestamp: { $max: "$timestamp" } } }, { $project: { activity_span: { $dateDiff: { startDate: "$min_timestamp", endDate: "$max_timestamp", unit: "millisecond" } } } }, { $sort: { activity_span: -1 } }, { $limit: 3 } ]); From 9b62d6df975b7760784dc2b638d5e02b7f8dac59 Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 05:37:33 +0530 Subject: [PATCH 04/14] uncomment mongo db installation --- ferretdb/install.sh | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ferretdb/install.sh b/ferretdb/install.sh index 2ed19a8..ab29984 100755 --- a/ferretdb/install.sh +++ b/ferretdb/install.sh @@ -2,15 +2,14 @@ # install docker -# sudo snap install docker +sudo snap install docker -# install mongosh -# sudo sudo apt-get install gnupg curl -#curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | \ -# sudo gpg --dearmor --yes -o /usr/share/keyrings/mongodb-server-8.0.gpg -#echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] https://repo.mongodb.org/apt/ubuntu noble/mongodb-org/8.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-8.0.list -#sudo apt-get update -#sudo apt-get install -y mongodb-org +sudo sudo apt-get install gnupg curl +curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | \ + sudo gpg --dearmor --yes -o /usr/share/keyrings/mongodb-server-8.0.gpg +echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] https://repo.mongodb.org/apt/ubuntu noble/mongodb-org/8.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-8.0.list +sudo apt-get update +sudo apt-get install -y mongodb-org # Run postgresql with documentdb as storage extension From f790a1b8104231fdbfb196d7684fdbcb9aef0e7f Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 05:58:25 +0530 Subject: [PATCH 05/14] enable index only scans --- ferretdb/install.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ferretdb/install.sh b/ferretdb/install.sh index ab29984..136eb3d 100755 --- a/ferretdb/install.sh +++ b/ferretdb/install.sh @@ -20,7 +20,8 @@ docker run -d --name postgres \ -e POSTGRES_PASSWORD=password \ -e POSTGRES_DB=postgres \ -v pgdata:/var/lib/postgresql/data \ - ghcr.io/ferretdb/postgres-documentdb:17-0.102.0-ferretdb-2.0.0 + ghcr.io/ferretdb/postgres-documentdb:17-0.102.0-ferretdb-2.0.0 \ + -c enable_indexscan=on -c enable_indexonlyscan=on # Run ferretdb docker run -d --name ferretdb \ From 4b9264953ee9aa50b2185518860394f7ea232bb9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 23 Mar 2025 09:52:01 +0000 Subject: [PATCH 06/14] Minor fixups --- README.md | 2 +- ferretdb/install.sh | 8 ++------ ferretdb/uninstall.sh | 4 +++- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 6371ee5..1328379 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ While the main benchmark uses a specific machine configuration for reproducibili - [x] VictoriaLogs - [x] SingleStore - [x] GreptimeDB +- [x] FerretDB - [ ] Quickwit - [ ] Meilisearch - [ ] Sneller @@ -146,7 +147,6 @@ While the main benchmark uses a specific machine configuration for reproducibili - [ ] OpenText Vertica - [ ] PartiQL - [ ] FishStore -- [ ] FerretDB - [ ] Apache Drill - [ ] GlareDB diff --git a/ferretdb/install.sh b/ferretdb/install.sh index 136eb3d..4efe164 100755 --- a/ferretdb/install.sh +++ b/ferretdb/install.sh @@ -1,7 +1,5 @@ #!/bin/bash - -# install docker sudo snap install docker sudo sudo apt-get install gnupg curl @@ -11,8 +9,7 @@ echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-8.0.gp sudo apt-get update sudo apt-get install -y mongodb-org - -# Run postgresql with documentdb as storage extension +# Run PostgreSQL with DocumentDB as storage extension docker run -d --name postgres \ --platform linux/amd64 \ --restart on-failure \ @@ -23,7 +20,7 @@ docker run -d --name postgres \ ghcr.io/ferretdb/postgres-documentdb:17-0.102.0-ferretdb-2.0.0 \ -c enable_indexscan=on -c enable_indexonlyscan=on -# Run ferretdb +# Run FerretDB docker run -d --name ferretdb \ --restart on-failure \ --link postgres \ @@ -31,4 +28,3 @@ docker run -d --name ferretdb \ -e FERRETDB_POSTGRESQL_URL=postgres://username:password@postgres:5432/postgres \ -e FERRETDB_AUTH=false \ ghcr.io/ferretdb/ferretdb:2.0.0 - diff --git a/ferretdb/uninstall.sh b/ferretdb/uninstall.sh index a78a7e0..e36652e 100755 --- a/ferretdb/uninstall.sh +++ b/ferretdb/uninstall.sh @@ -5,4 +5,6 @@ docker rm ferretdb docker stop postgres docker rm postgres -docker volume rm pgdata \ No newline at end of file +docker volume rm pgdata + +sudo snap remove --purge docker From fb98844b0ef67e49ff60dc74e810e35459f52091 Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 19:53:47 +0530 Subject: [PATCH 07/14] Use sym links --- ferretdb/benchmark.sh | 33 +------ ferretdb/count.sh | 32 +------ ferretdb/create_and_load.sh | 32 +------ ferretdb/data_size.sh | 27 +----- ferretdb/ddl_snappy.js | 7 +- ferretdb/ddl_zstd.js | 7 +- ferretdb/drop_table.sh | 14 +-- ferretdb/index_size.sh | 27 +----- ferretdb/load_data.sh | 77 +--------------- ferretdb/main.sh | 87 +------------------ ferretdb/queries.js | 6 +- ferretdb/queries_formatted.js | 159 +--------------------------------- ferretdb/query_results.sh | 45 +--------- ferretdb/run_queries.sh | 81 +---------------- ferretdb/total_size.sh | 27 +----- 15 files changed, 15 insertions(+), 646 deletions(-) mode change 100755 => 120000 ferretdb/benchmark.sh mode change 100755 => 120000 ferretdb/count.sh mode change 100755 => 120000 ferretdb/create_and_load.sh mode change 100755 => 120000 ferretdb/data_size.sh mode change 100644 => 120000 ferretdb/ddl_snappy.js mode change 100644 => 120000 ferretdb/ddl_zstd.js mode change 100755 => 120000 ferretdb/drop_table.sh mode change 100755 => 120000 ferretdb/index_size.sh mode change 100755 => 120000 ferretdb/load_data.sh mode change 100755 => 120000 ferretdb/main.sh mode change 100644 => 120000 ferretdb/queries.js mode change 100644 => 120000 ferretdb/queries_formatted.js mode change 100755 => 120000 ferretdb/query_results.sh mode change 100755 => 120000 ferretdb/run_queries.sh mode change 100755 => 120000 ferretdb/total_size.sh diff --git a/ferretdb/benchmark.sh b/ferretdb/benchmark.sh deleted file mode 100755 index 3158efb..0000000 --- a/ferretdb/benchmark.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 1 ]]; then - echo "Usage: $0 [RESULT_FILE]" - exit 1 -fi - -# Arguments -DB_NAME="$1" -RESULT_FILE="${2:-}" - -# Construct the query log file name using $DB_NAME -QUERY_LOG_FILE="_query_log_${DB_NAME}.txt" - -# Print the database name -echo "Running queries on database: $DB_NAME" - -# Run queries and log the output -./run_queries.sh "$DB_NAME" 2>&1 | tee "$QUERY_LOG_FILE" - -# Process the query log and prepare the result -RESULT=$(cat "$QUERY_LOG_FILE" | grep -oP 'Execution time: \d+ms' | sed -r 's/Execution time: ([0-9]+)/\1/' | \ -awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }') - -# Output the result -if [[ -n "$RESULT_FILE" ]]; then - echo "$RESULT" > "$RESULT_FILE" - echo "Result written to $RESULT_FILE" -else - echo "$RESULT" -fi \ No newline at end of file diff --git a/ferretdb/benchmark.sh b/ferretdb/benchmark.sh new file mode 120000 index 0000000..d1001ac --- /dev/null +++ b/ferretdb/benchmark.sh @@ -0,0 +1 @@ +../mongodb/benchmark.sh \ No newline at end of file diff --git a/ferretdb/count.sh b/ferretdb/count.sh deleted file mode 100755 index f5c0206..0000000 --- a/ferretdb/count.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 2 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DATABASE_NAME="$1" -COLLECTION_NAME="$2" - -# Fetch the document count using mongosh -document_count=$(mongosh --quiet --eval " - const db = db.getSiblingDB('$DATABASE_NAME'); - const count = db.getCollection('$COLLECTION_NAME').stats().count - print(count); -") - -# Debugging information -echo "Database: $DATABASE_NAME" -echo "Collection: $COLLECTION_NAME" -echo "Document count: $document_count" - -# Print the result -if [[ -z "$document_count" ]]; then - echo "Error: Unable to fetch document count. Ensure the database and collection exist." - exit 1 -else - echo $document_count -fi \ No newline at end of file diff --git a/ferretdb/count.sh b/ferretdb/count.sh new file mode 120000 index 0000000..bde12af --- /dev/null +++ b/ferretdb/count.sh @@ -0,0 +1 @@ +../mongodb/count.sh \ No newline at end of file diff --git a/ferretdb/create_and_load.sh b/ferretdb/create_and_load.sh deleted file mode 100755 index 222a989..0000000 --- a/ferretdb/create_and_load.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 7 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DB_NAME="$1" -COLLECTION_NAME="$2" -DDL_FILE="$3" -DATA_DIRECTORY="$4" -NUM_FILES="$5" -SUCCESS_LOG="$6" -ERROR_LOG="$7" - -# Validate arguments -[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; } -[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; } -[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; } - -# Create database and execute DDL file -mongosh --quiet --eval " - db = db.getSiblingDB('$DB_NAME'); - load('$DDL_FILE'); -" - - -echo "Loading data" -./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$COLLECTION_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG" diff --git a/ferretdb/create_and_load.sh b/ferretdb/create_and_load.sh new file mode 120000 index 0000000..b38a658 --- /dev/null +++ b/ferretdb/create_and_load.sh @@ -0,0 +1 @@ +../mongodb/create_and_load.sh \ No newline at end of file diff --git a/ferretdb/data_size.sh b/ferretdb/data_size.sh deleted file mode 100755 index 0b3d49a..0000000 --- a/ferretdb/data_size.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 2 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DATABASE_NAME="$1" -COLLECTION_NAME="$2" - -# Fetch the totalSize using mongosh -total_size=$(mongosh --quiet --eval " - const db = db.getSiblingDB('$DATABASE_NAME'); - const stats = db.getCollection('$COLLECTION_NAME').stats(); - print(stats.storageSize); -") - -# Print the result -if [[ -z "$total_size" ]]; then - echo "Error: Unable to fetch totalSize. Ensure the database and collection exist." - exit 1 -else - echo $total_size -fi \ No newline at end of file diff --git a/ferretdb/data_size.sh b/ferretdb/data_size.sh new file mode 120000 index 0000000..f2db345 --- /dev/null +++ b/ferretdb/data_size.sh @@ -0,0 +1 @@ +../mongodb/data_size.sh \ No newline at end of file diff --git a/ferretdb/ddl_snappy.js b/ferretdb/ddl_snappy.js deleted file mode 100644 index c4577d1..0000000 --- a/ferretdb/ddl_snappy.js +++ /dev/null @@ -1,6 +0,0 @@ -db.createCollection( - "bluesky", - { storageEngine: { wiredTiger: { configString: "block_compressor=snappy" } } } -); - -db.bluesky.createIndex({"kind": 1, "commit.operation": 1, "commit.collection": 1, "did": 1, "time_us": 1}); \ No newline at end of file diff --git a/ferretdb/ddl_snappy.js b/ferretdb/ddl_snappy.js new file mode 120000 index 0000000..15df15c --- /dev/null +++ b/ferretdb/ddl_snappy.js @@ -0,0 +1 @@ +../mongodb/ddl_snappy.js \ No newline at end of file diff --git a/ferretdb/ddl_zstd.js b/ferretdb/ddl_zstd.js deleted file mode 100644 index ae5f59e..0000000 --- a/ferretdb/ddl_zstd.js +++ /dev/null @@ -1,6 +0,0 @@ -db.createCollection( - "bluesky", - { storageEngine: { wiredTiger: { configString: "block_compressor=zstd" } } } -); - -db.bluesky.createIndex({"kind": 1, "commit.operation": 1, "commit.collection": 1, "did": 1, "time_us": 1}); \ No newline at end of file diff --git a/ferretdb/ddl_zstd.js b/ferretdb/ddl_zstd.js new file mode 120000 index 0000000..7f4f3ba --- /dev/null +++ b/ferretdb/ddl_zstd.js @@ -0,0 +1 @@ +../mongodb/ddl_zstd.js \ No newline at end of file diff --git a/ferretdb/drop_table.sh b/ferretdb/drop_table.sh deleted file mode 100755 index 038a9b5..0000000 --- a/ferretdb/drop_table.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 1 ]]; then - echo "Usage: $0 " - exit 1 -fi - -DB_NAME="$1" - -echo "Dropping database: $DB_NAME" - -mongosh --eval "use $DB_NAME" --eval "db.dropDatabase()" diff --git a/ferretdb/drop_table.sh b/ferretdb/drop_table.sh new file mode 120000 index 0000000..ae2b09f --- /dev/null +++ b/ferretdb/drop_table.sh @@ -0,0 +1 @@ +../mongodb/drop_table.sh \ No newline at end of file diff --git a/ferretdb/index_size.sh b/ferretdb/index_size.sh deleted file mode 100755 index b298d3a..0000000 --- a/ferretdb/index_size.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 2 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DATABASE_NAME="$1" -COLLECTION_NAME="$2" - -# Fetch the totalSize using mongosh -total_size=$(mongosh --quiet --eval " - const db = db.getSiblingDB('$DATABASE_NAME'); - const stats = db.getCollection('$COLLECTION_NAME').stats(); - print(stats.totalIndexSize); -") - -# Print the result -if [[ -z "$total_size" ]]; then - echo "Error: Unable to fetch totalSize. Ensure the database and collection exist." - exit 1 -else - echo $total_size -fi \ No newline at end of file diff --git a/ferretdb/index_size.sh b/ferretdb/index_size.sh new file mode 120000 index 0000000..0b13a24 --- /dev/null +++ b/ferretdb/index_size.sh @@ -0,0 +1 @@ +../mongodb/index_size.sh \ No newline at end of file diff --git a/ferretdb/load_data.sh b/ferretdb/load_data.sh deleted file mode 100755 index 7624047..0000000 --- a/ferretdb/load_data.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 6 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DIRECTORY="$1" -DB_NAME="$2" -COLLECTION_NAME="$3" -MAX_FILES="$4" -SUCCESS_LOG="$5" -ERROR_LOG="$6" -MONGO_URI="mongodb://localhost:27017" # Replace with your MongoDB URI if necessary - -# Validate that MAX_FILES is a number -if ! [[ "$MAX_FILES" =~ ^[0-9]+$ ]]; then - echo "Error: must be a positive integer." - exit 1 -fi - -# Ensure the log files exist -touch "$SUCCESS_LOG" "$ERROR_LOG" - -# Create a temporary directory for uncompressed files -TEMP_DIR=$(mktemp -d /var/tmp/json_files.XXXXXX) -trap "rm -rf $TEMP_DIR" EXIT # Ensure cleanup on script exit - -# Counter to track processed files -counter=0 - -# Loop through each .json.gz file in the directory -for file in $(ls "$DIRECTORY"/*.json.gz 2>/dev/null | sort); do - if [[ -f "$file" ]]; then - echo "Processing $file..." - counter=$((counter + 1)) - - # Uncompress the file into the TEMP_DIR - uncompressed_file="$TEMP_DIR/$(basename "${file%.gz}")" - gunzip -c "$file" > "$uncompressed_file" - - # Check if uncompression was successful - if [[ $? -ne 0 ]]; then - echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed to uncompress $file." >> "$ERROR_LOG" - continue - fi - - # Import the uncompressed JSON file into MongoDB - mongoimport --uri "$MONGO_URI" --db "$DB_NAME" --collection "$COLLECTION_NAME" --file "$uncompressed_file" - import_status=$? - - # Check if the import was successful - if [[ $import_status -eq 0 ]]; then - echo "[$(date '+%Y-%m-%d %H:%M:%S')] Successfully imported $uncompressed_file into MongoDB." >> "$SUCCESS_LOG" - else - echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failed to import $uncompressed_file into MongoDB." >> "$ERROR_LOG" - fi - - # Remove the uncompressed file after processing - rm -f "$uncompressed_file" - - # Stop processing if the max number of files is reached - if [[ $counter -ge $MAX_FILES ]]; then - echo "Processed maximum number of files: $MAX_FILES" - break - fi - fi -done - -if [[ $counter -eq 0 ]]; then - echo "No .json.gz files found in the directory." -fi - -echo "All files have been processed." \ No newline at end of file diff --git a/ferretdb/load_data.sh b/ferretdb/load_data.sh new file mode 120000 index 0000000..39502fe --- /dev/null +++ b/ferretdb/load_data.sh @@ -0,0 +1 @@ +../mongodb/load_data.sh \ No newline at end of file diff --git a/ferretdb/main.sh b/ferretdb/main.sh deleted file mode 100755 index 57d79ac..0000000 --- a/ferretdb/main.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -DEFAULT_CHOICE=ask -DEFAULT_DATA_DIRECTORY=~/data/bluesky - -# Allow the user to optionally provide the scale factor ("choice") as an argument -CHOICE="${1:-$DEFAULT_CHOICE}" - -# Allow the user to optionally provide the data directory as an argument -DATA_DIRECTORY="${2:-$DEFAULT_DATA_DIRECTORY}" - -# Define success and error log files -SUCCESS_LOG="${3:-success.log}" -ERROR_LOG="${4:-error.log}" - -# Define prefix for output files -OUTPUT_PREFIX="${5:-_m6i.8xlarge}" - -# Check if the directory exists -if [[ ! -d "$DATA_DIRECTORY" ]]; then - echo "Error: Data directory '$DATA_DIRECTORY' does not exist." - exit 1 -fi - -if [ "$CHOICE" = "ask" ]; then - echo "Select the dataset size to benchmark:" - echo "1) 1m (default)" - echo "2) 10m" - echo "3) 100m" - echo "4) 1000m" - echo "5) all" - read -p "Enter the number corresponding to your choice: " CHOICE -fi - -./install.sh - -benchmark() { - local size=$1 - local compression=$2 - # Check DATA_DIRECTORY contains the required number of files to run the benchmark - file_count=$(find "$DATA_DIRECTORY" -type f | wc -l) - if (( file_count < size )); then - echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count." - exit 1 - fi - ./create_and_load.sh "bluesky_${size}m_${compression}" bluesky "ddl_${compression}.js" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG" - ./total_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.total_size" - ./data_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.data_size" - ./index_size.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_size" - ./count.sh "bluesky_${size}m_${compression}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.count" - #./query_results.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.query_results" - ./index_usage.sh "bluesky_${size}m_${compression}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.index_usage" - ./benchmark.sh "bluesky_${size}m_${compression}" "${OUTPUT_PREFIX}_bluesky_${size}m_${compression}.results_runtime" - ./drop_table.sh "bluesky_${size}m_${compression}" -} - -case $choice in - 2) - benchmark 10 snappy - benchmark 10 zstd - ;; - 3) - benchmark 100 snappy - benchmark 100 zstd - ;; - 4) - benchmark 1000 snappy - benchmark 1000 zstd - ;; - 5) - benchmark 1 snappy - benchmark 1 zstd - benchmark 10 snappy - benchmark 10 zstd - benchmark 100 snappy - benchmark 100 zstd - benchmark 1000 snappy - benchmark 1000 zstd - ;; - *) - benchmark 1 snappy - benchmark 1 zstd - ;; -esac - -./uninstall.sh diff --git a/ferretdb/main.sh b/ferretdb/main.sh new file mode 120000 index 0000000..3b9d1af --- /dev/null +++ b/ferretdb/main.sh @@ -0,0 +1 @@ +../mongodb/main.sh \ No newline at end of file diff --git a/ferretdb/queries.js b/ferretdb/queries.js deleted file mode 100644 index ebdbc75..0000000 --- a/ferretdb/queries.js +++ /dev/null @@ -1,5 +0,0 @@ -db.bluesky.aggregate([ { $group: { _id: "$commit.collection", count: { $sum: 1 } } }, { $sort: { count: -1 } } ]); -db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create" } }, { $group: { _id: "$commit.collection", count: { $sum: 1 }, users: { $addToSet: "$did" } } }, { $project: { event: "$_id", count: 1, users: { $size: "$users" } } }, { $sort: { count: -1 } } ]); -db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": { $in: ["app.bsky.feed.post", "app.bsky.feed.repost", "app.bsky.feed.like"] } } }, { $project: { _id: 0, event: "$commit.collection", hour_of_day: { $hour: { $toDate: { $divide: ["$time_us", 1000] } } } } }, { $group: { _id: { event: "$event", hour_of_day: "$hour_of_day" }, count: { $sum: 1 } } }, { $sort: { "_id.hour_of_day": 1, "_id.event": 1 } } ]); -db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", first_post_ts: { $min: "$timestamp" } } }, { $sort: { first_post_ts: 1 } }, { $limit: 3 } ]); -db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", min_timestamp: { $min: "$timestamp" }, max_timestamp: { $max: "$timestamp" } } }, { $project: { activity_span: { $dateDiff: { startDate: "$min_timestamp", endDate: "$max_timestamp", unit: "millisecond" } } } }, { $sort: { activity_span: -1 } }, { $limit: 3 } ]); diff --git a/ferretdb/queries.js b/ferretdb/queries.js new file mode 120000 index 0000000..e2879ea --- /dev/null +++ b/ferretdb/queries.js @@ -0,0 +1 @@ +../mongodb/queries.js \ No newline at end of file diff --git a/ferretdb/queries_formatted.js b/ferretdb/queries_formatted.js deleted file mode 100644 index 616ed48..0000000 --- a/ferretdb/queries_formatted.js +++ /dev/null @@ -1,158 +0,0 @@ -// ------------------------------------------------------------------------------------------------------------------------ -// -- Q1 - Top event types -// --------------------------------------------------------------------------------------------------------------------- -db.bluesky.aggregate([ - { - $group: { - _id: "$commit.collection", - count: { $sum: 1 } - } - }, - { - $sort: { count: -1 } - } -]); - -// --------------------------------------------------------------------------------------------------------------------- -// -- Q2 - Top event types together with unique users per event type -// --------------------------------------------------------------------------------------------------------------------- -db.bluesky.aggregate([ - { - $match: { - "kind": "commit", - "commit.operation": "create" - } - }, - - { - $group: { - _id: "$commit.collection", - count: { $sum: 1 }, - users: { $addToSet: "$did" } - } - }, - { - $project: { - event: "$_id", - count: 1, - users: { $size: "$users" } - } - }, - { - $sort: { count: -1 } - } -]); - -// --------------------------------------------------------------------------------------------------------------------- -// -- Q3 - When do people use BlueSky -// --------------------------------------------------------------------------------------------------------------------- -db.bluesky.aggregate([ - { - $match: { - "kind": "commit", - "commit.operation": "create", - "commit.collection": { - $in: ["app.bsky.feed.post", "app.bsky.feed.repost", "app.bsky.feed.like"] - } - } - }, - { - $project: { - _id: 0, - event: "$commit.collection", - hour_of_day: { - $hour: { - $toDate: { $divide: ["$time_us", 1000] } - } - } - } - }, - { - $group: { - _id: { event: "$event", hour_of_day: "$hour_of_day" }, - count: { $sum: 1 } - } - }, - { - $sort: { - "_id.hour_of_day": 1, - "_id.event": 1 - } - } -]); - -// --------------------------------------------------------------------------------------------------------------------- -// -- Q4 - top 3 post veterans -// --------------------------------------------------------------------------------------------------------------------- -db.bluesky.aggregate([ - { - $match: { - "kind": "commit", - "commit.operation": "create", - "commit.collection": "app.bsky.feed.post" - } - }, - { - $project: { - _id: 0, - user_id: "$did", - timestamp: { $toDate: { $divide: ["$time_us", 1000] } } - } - }, - { - $group: { - _id: "$user_id", - first_post_ts: { $min: "$timestamp" } - } - }, - { - $sort: { first_post_ts: 1 } - }, - { - $limit: 3 - } -]); - -// --------------------------------------------------------------------------------------------------------------------- -// -- Q5 - top 3 users with longest activity -// --------------------------------------------------------------------------------------------------------------------- -db.bluesky.aggregate([ - { - $match: { - "kind": "commit", - "commit.operation": "create", - "commit.collection": "app.bsky.feed.post" - } - }, - { - $project: { - _id: 0, - user_id: "$did", - timestamp: { $toDate: { $divide: ["$time_us", 1000] } } - } - }, - { - $group: { - _id: "$user_id", - min_timestamp: { $min: "$timestamp" }, - max_timestamp: { $max: "$timestamp" } - } - }, - { - $project: { - activity_span: { - $dateDiff: { - startDate: "$min_timestamp", - endDate: "$max_timestamp", - unit: "millisecond" - } - } - } - }, - { - $sort: { activity_span: -1 } - }, - { - $limit: 3 - } -]); diff --git a/ferretdb/queries_formatted.js b/ferretdb/queries_formatted.js new file mode 120000 index 0000000..425dc9a --- /dev/null +++ b/ferretdb/queries_formatted.js @@ -0,0 +1 @@ +../mongodb/queries_formatted.js \ No newline at end of file diff --git a/ferretdb/query_results.sh b/ferretdb/query_results.sh deleted file mode 100755 index 8c7c247..0000000 --- a/ferretdb/query_results.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 1 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DB_NAME="$1" - -QUERY_NUM=1 - -# File containing MongoDB queries (replace 'queries.js' with your file) -QUERY_FILE="queries.js" - -# Check if the query file exists -if [[ ! -f "$QUERY_FILE" ]]; then - echo "Error: Query file '$QUERY_FILE' does not exist." - exit 1 -fi - -# Read and execute each query -cat "$QUERY_FILE" | while read -r query; do - - # Print the query - echo "------------------------------------------------------------------------------------------------------------------------" - echo "Result for query Q$QUERY_NUM:" - echo - - # Escape the query for safe passing to mongosh - ESCAPED_QUERY=$(echo "$query" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') - - mongosh --eval " - const db = db.getSiblingDB('$DB_NAME'); - const result = eval(\"$ESCAPED_QUERY\"); - printjson(result); - " - - - # Increment the query number - QUERY_NUM=$((QUERY_NUM + 1)) - -done \ No newline at end of file diff --git a/ferretdb/query_results.sh b/ferretdb/query_results.sh new file mode 120000 index 0000000..799f90e --- /dev/null +++ b/ferretdb/query_results.sh @@ -0,0 +1 @@ +../mongodb/query_results.sh \ No newline at end of file diff --git a/ferretdb/run_queries.sh b/ferretdb/run_queries.sh deleted file mode 100755 index 43c670a..0000000 --- a/ferretdb/run_queries.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 1 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DB_NAME="$1" - -# Number of tries for each query -TRIES=3 - -# File containing MongoDB queries (replace 'queries.js' with your file) -QUERY_FILE="queries.js" - -# Check if the query file exists -if [[ ! -f "$QUERY_FILE" ]]; then - echo "Error: Query file '$QUERY_FILE' does not exist." - exit 1 -fi - -# Set the internalQueryMaxAddToSetBytes parameter to 1 GB -echo "Setting internalQueryMaxAddToSetBytes to 1 GB..." -mongosh --quiet --eval " - const result = db.adminCommand({ setParameter: 1, internalQueryMaxAddToSetBytes: 1073741824 }); - if (result.ok !== 1) { - print('Failed to set internalQueryMaxAddToSetBytes: ' + JSON.stringify(result)); - quit(1); - } else { - print('Successfully set internalQueryMaxAddToSetBytes to 1 GB'); - } -" - -# Set the internalQueryPlannerGenerateCoveredWholeIndexScans parameter to true -echo "Setting internalQueryPlannerGenerateCoveredWholeIndexScans to true..." -mongosh --quiet --eval " - const result = db.adminCommand({ setParameter: 1, internalQueryPlannerGenerateCoveredWholeIndexScans: true }); - if (result.ok !== 1) { - print('Failed to set internalQueryPlannerGenerateCoveredWholeIndexScans: ' + JSON.stringify(result)); - quit(1); - } else { - print('Successfully set internalQueryPlannerGenerateCoveredWholeIndexScans to true'); - } -" - -# Read and execute each query -cat "$QUERY_FILE" | while read -r query; do - - # Clear the Linux file system cache - echo "Clearing file system cache..." - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - echo "File system cache cleared." - - # Print the query - echo "Running query: $query" - - # Escape the query for safe passing to mongosh - ESCAPED_QUERY=$(echo "$query" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') - - # Execute the query multiple times - for i in $(seq 1 $TRIES); do - mongosh --quiet --eval " - const db = db.getSiblingDB('$DB_NAME'); - const start = new Date(); - const result = eval(\"$ESCAPED_QUERY\"); - // Force query execution -> When using commands like aggregate() or find(), - // the query is not fully executed until the data is actually fetched or processed. - if (Array.isArray(result)) { - result.length; // Access the length to force evaluation for arrays - } else if (typeof result === 'object' && typeof result.toArray === 'function') { - result.toArray(); // Force execution for cursors - } - const end = new Date(); - print('Execution time: ' + (end.getTime() - start.getTime()) + 'ms'); - " - done -done \ No newline at end of file diff --git a/ferretdb/run_queries.sh b/ferretdb/run_queries.sh new file mode 120000 index 0000000..a7507e5 --- /dev/null +++ b/ferretdb/run_queries.sh @@ -0,0 +1 @@ +../mongodb/run_queries.sh \ No newline at end of file diff --git a/ferretdb/total_size.sh b/ferretdb/total_size.sh deleted file mode 100755 index e05a6ca..0000000 --- a/ferretdb/total_size.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Check if the required arguments are provided -if [[ $# -lt 2 ]]; then - echo "Usage: $0 " - exit 1 -fi - -# Arguments -DATABASE_NAME="$1" -COLLECTION_NAME="$2" - -# Fetch the totalSize using mongosh -total_size=$(mongosh --quiet --eval " - const db = db.getSiblingDB('$DATABASE_NAME'); - const stats = db.getCollection('$COLLECTION_NAME').stats(); - print(stats.totalSize); -") - -# Print the result -if [[ -z "$total_size" ]]; then - echo "Error: Unable to fetch totalSize. Ensure the database and collection exist." - exit 1 -else - echo $total_size -fi \ No newline at end of file diff --git a/ferretdb/total_size.sh b/ferretdb/total_size.sh new file mode 120000 index 0000000..2dbec70 --- /dev/null +++ b/ferretdb/total_size.sh @@ -0,0 +1 @@ +../mongodb/total_size.sh \ No newline at end of file From d3b7f8d205ebc84f1ef49a8a215853ce74785a8d Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 20:04:59 +0530 Subject: [PATCH 08/14] Remove mongodb in cleanup --- ferretdb/uninstall.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ferretdb/uninstall.sh b/ferretdb/uninstall.sh index e36652e..307a142 100755 --- a/ferretdb/uninstall.sh +++ b/ferretdb/uninstall.sh @@ -8,3 +8,4 @@ docker rm postgres docker volume rm pgdata sudo snap remove --purge docker +sudo apt-get remove -y mongodb-org \ No newline at end of file From ed8ae3980e7213c78ebbece4d7f88b36432ed77c Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sun, 23 Mar 2025 20:06:49 +0530 Subject: [PATCH 09/14] Add newline --- mongodb/queries.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongodb/queries.js b/mongodb/queries.js index b536c49..ebdbc75 100644 --- a/mongodb/queries.js +++ b/mongodb/queries.js @@ -2,4 +2,4 @@ db.bluesky.aggregate([ { $group: { _id: "$commit.collection", count: { $sum: 1 } db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create" } }, { $group: { _id: "$commit.collection", count: { $sum: 1 }, users: { $addToSet: "$did" } } }, { $project: { event: "$_id", count: 1, users: { $size: "$users" } } }, { $sort: { count: -1 } } ]); db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": { $in: ["app.bsky.feed.post", "app.bsky.feed.repost", "app.bsky.feed.like"] } } }, { $project: { _id: 0, event: "$commit.collection", hour_of_day: { $hour: { $toDate: { $divide: ["$time_us", 1000] } } } } }, { $group: { _id: { event: "$event", hour_of_day: "$hour_of_day" }, count: { $sum: 1 } } }, { $sort: { "_id.hour_of_day": 1, "_id.event": 1 } } ]); db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", first_post_ts: { $min: "$timestamp" } } }, { $sort: { first_post_ts: 1 } }, { $limit: 3 } ]); -db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", min_timestamp: { $min: "$timestamp" }, max_timestamp: { $max: "$timestamp" } } }, { $project: { activity_span: { $dateDiff: { startDate: "$min_timestamp", endDate: "$max_timestamp", unit: "millisecond" } } } }, { $sort: { activity_span: -1 } }, { $limit: 3 } ]); \ No newline at end of file +db.bluesky.aggregate([ { $match: { "kind": "commit", "commit.operation": "create", "commit.collection": "app.bsky.feed.post" } }, { $project: { _id: 0, user_id: "$did", timestamp: { $toDate: { $divide: ["$time_us", 1000] } } } }, { $group: { _id: "$user_id", min_timestamp: { $min: "$timestamp" }, max_timestamp: { $max: "$timestamp" } } }, { $project: { activity_span: { $dateDiff: { startDate: "$min_timestamp", endDate: "$max_timestamp", unit: "millisecond" } } } }, { $sort: { activity_span: -1 } }, { $limit: 3 } ]); From b4eddb1b9e4bdf009844d0c867807bd0cc112c82 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 24 Mar 2025 09:32:26 +0000 Subject: [PATCH 10/14] Uninstall script: Use reverse order of install script --- ferretdb/uninstall.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ferretdb/uninstall.sh b/ferretdb/uninstall.sh index 307a142..3a36fea 100755 --- a/ferretdb/uninstall.sh +++ b/ferretdb/uninstall.sh @@ -7,5 +7,6 @@ docker stop postgres docker rm postgres docker volume rm pgdata +sudo apt-get remove -y mongodb-org + sudo snap remove --purge docker -sudo apt-get remove -y mongodb-org \ No newline at end of file From bf4056be2ab1ef3a50cf52c3c3acca13365ac9ee Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 24 Mar 2025 09:36:34 +0000 Subject: [PATCH 11/14] Fix choice selection --- mongodb/main.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongodb/main.sh b/mongodb/main.sh index 57d79ac..54c2ece 100755 --- a/mongodb/main.sh +++ b/mongodb/main.sh @@ -54,7 +54,7 @@ benchmark() { ./drop_table.sh "bluesky_${size}m_${compression}" } -case $choice in +case $CHOICE in 2) benchmark 10 snappy benchmark 10 zstd From b5007813a11be48185ec8e2f0869d294603b1ba5 Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Mon, 24 Mar 2025 15:58:02 +0530 Subject: [PATCH 12/14] Resolve comments and remove unsettable parameters --- ferretdb/index_usage.sh | 15 ++-------- ferretdb/run_queries.sh | 61 ++++++++++++++++++++++++++++++++++++++++- mongodb/index_usage.sh | 2 ++ mongodb/run_queries.sh | 2 ++ 4 files changed, 67 insertions(+), 13 deletions(-) mode change 120000 => 100755 ferretdb/run_queries.sh diff --git a/ferretdb/index_usage.sh b/ferretdb/index_usage.sh index 19676f8..6ada891 100755 --- a/ferretdb/index_usage.sh +++ b/ferretdb/index_usage.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file other than that, please also change it in mongodb/index_usage.sh as well + # Check if the required arguments are provided if [[ $# -lt 1 ]]; then echo "Usage: $0 " @@ -20,18 +22,6 @@ if [[ ! -f "$QUERY_FILE" ]]; then exit 1 fi -# Set the internalQueryPlannerGenerateCoveredWholeIndexScans parameter to true -echo "Setting internalQueryPlannerGenerateCoveredWholeIndexScans to true..." -mongosh --quiet --eval " - const result = db.adminCommand({ setParameter: 1, internalQueryPlannerGenerateCoveredWholeIndexScans: true }); - if (result.ok !== 1) { - print('Failed to set internalQueryPlannerGenerateCoveredWholeIndexScans: ' + JSON.stringify(result)); - quit(1); - } else { - print('Successfully set internalQueryPlannerGenerateCoveredWholeIndexScans to true'); - } -" - cat "$QUERY_FILE" | while read -r query; do # Print the query number @@ -45,6 +35,7 @@ cat "$QUERY_FILE" | while read -r query; do # Escape the modified query for safe passing to mongosh ESCAPED_QUERY=$(echo "$MODIFIED_QUERY" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') + # Due to a difference in query planner outputs from postgresql and mongodb, entire json is printed here. mongosh --quiet --eval " const db = db.getSiblingDB('$DB_NAME'); const result = eval(\"$ESCAPED_QUERY\"); diff --git a/ferretdb/run_queries.sh b/ferretdb/run_queries.sh deleted file mode 120000 index a7507e5..0000000 --- a/ferretdb/run_queries.sh +++ /dev/null @@ -1 +0,0 @@ -../mongodb/run_queries.sh \ No newline at end of file diff --git a/ferretdb/run_queries.sh b/ferretdb/run_queries.sh new file mode 100755 index 0000000..678c99d --- /dev/null +++ b/ferretdb/run_queries.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# If you change something in this file other than that, please also change it in mongodb/run_queries.sh as well + + +# Check if the required arguments are provided +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Arguments +DB_NAME="$1" + +# Number of tries for each query +TRIES=3 + +# File containing MongoDB queries (replace 'queries.js' with your file) +QUERY_FILE="queries.js" + +# Check if the query file exists +if [[ ! -f "$QUERY_FILE" ]]; then + echo "Error: Query file '$QUERY_FILE' does not exist." + exit 1 +fi + +# If you change something in this file, please also change it in mongodb/index_usage.sh as well +# Read and execute each query +cat "$QUERY_FILE" | while read -r query; do + + # Clear the Linux file system cache + echo "Clearing file system cache..." + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + echo "File system cache cleared." + + # Print the query + echo "Running query: $query" + + # Escape the query for safe passing to mongosh + ESCAPED_QUERY=$(echo "$query" | sed 's/\([\"\\]\)/\\\1/g' | sed 's/\$/\\$/g') + + # Execute the query multiple times + for i in $(seq 1 $TRIES); do + mongosh --quiet --eval " + const db = db.getSiblingDB('$DB_NAME'); + const start = new Date(); + const result = eval(\"$ESCAPED_QUERY\"); + // Force query execution -> When using commands like aggregate() or find(), + // the query is not fully executed until the data is actually fetched or processed. + if (Array.isArray(result)) { + result.length; // Access the length to force evaluation for arrays + } else if (typeof result === 'object' && typeof result.toArray === 'function') { + result.toArray(); // Force execution for cursors + } + const end = new Date(); + print('Execution time: ' + (end.getTime() - start.getTime()) + 'ms'); + " + done +done \ No newline at end of file diff --git a/mongodb/index_usage.sh b/mongodb/index_usage.sh index f2c2ad7..4124311 100755 --- a/mongodb/index_usage.sh +++ b/mongodb/index_usage.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please also change it in ferretdb/index_usage.sh as well + # Check if the required arguments are provided if [[ $# -lt 1 ]]; then echo "Usage: $0 " diff --git a/mongodb/run_queries.sh b/mongodb/run_queries.sh index 43c670a..e2ac655 100755 --- a/mongodb/run_queries.sh +++ b/mongodb/run_queries.sh @@ -1,5 +1,7 @@ #!/bin/bash +# If you change something in this file, please also change it in ferretdb/run_queries.sh as well + # Check if the required arguments are provided if [[ $# -lt 1 ]]; then echo "Usage: $0 " From 6f06e7046bdf71086f2df5c59dedb9e32ebce357 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Mar 2025 07:47:02 +0000 Subject: [PATCH 13/14] Add results --- ...6i.8xlarge_bluesky_100m_snappy.index_usage | 773 ++++++++++++++++++ ..._m6i.8xlarge_bluesky_100m_zstd.index_usage | 773 ++++++++++++++++++ ...m6i.8xlarge_bluesky_10m_snappy.index_usage | 746 +++++++++++++++++ .../_m6i.8xlarge_bluesky_10m_zstd.index_usage | 746 +++++++++++++++++ ..._m6i.8xlarge_bluesky_1m_snappy.index_usage | 733 +++++++++++++++++ .../_m6i.8xlarge_bluesky_1m_zstd.index_usage | 733 +++++++++++++++++ .../m6i.8xlarge_bluesky_100m_snappy.json | 24 + .../m6i.8xlarge_bluesky_100m_zstd.json | 24 + .../m6i.8xlarge_bluesky_10m_snappy.json | 24 + .../results/m6i.8xlarge_bluesky_10m_zstd.json | 24 + .../m6i.8xlarge_bluesky_1m_snappy.json | 24 + .../results/m6i.8xlarge_bluesky_1m_zstd.json | 24 + 12 files changed, 4648 insertions(+) create mode 100644 ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_snappy.index_usage create mode 100644 ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_zstd.index_usage create mode 100644 ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_snappy.index_usage create mode 100644 ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_zstd.index_usage create mode 100644 ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_snappy.index_usage create mode 100644 ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_zstd.index_usage create mode 100644 ferretdb/results/m6i.8xlarge_bluesky_100m_snappy.json create mode 100644 ferretdb/results/m6i.8xlarge_bluesky_100m_zstd.json create mode 100644 ferretdb/results/m6i.8xlarge_bluesky_10m_snappy.json create mode 100644 ferretdb/results/m6i.8xlarge_bluesky_10m_zstd.json create mode 100644 ferretdb/results/m6i.8xlarge_bluesky_1m_snappy.json create mode 100644 ferretdb/results/m6i.8xlarge_bluesky_1m_zstd.json diff --git a/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_snappy.index_usage b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_snappy.index_usage new file mode 100644 index 0000000..f31a980 --- /dev/null +++ b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_snappy.index_usage @@ -0,0 +1,773 @@ +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q1: + +{ + queryPlanner: { + Plan: { + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 50924089.98, + 'Plan Rows': 94379953, + Plans: [ + { + Alias: 'agg_stage_0', + 'Startup Cost': 22836927.54, + 'Plan Rows': 94379953, + 'Plan Width': 64, + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 27541178.32, + Plans: [ + { + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 22836927.54, + 'Total Cost': 26833328.67, + 'Plan Rows': 94379953, + 'Plan Width': 128, + 'Node Type': 'Aggregate', + 'Parent Relationship': 'Subquery', + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ], + 'Planned Partitions': 256, + Plans: [ + { + 'Node Type': 'Seq Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_16', + Alias: 'collection', + 'Startup Cost': 0, + 'Total Cost': 8267022.29, + 'Plan Rows': 94379953, + 'Plan Width': 32 + } + ] + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer' + } + ], + 'Startup Cost': 50688140.1, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_0.c1, agg_stage_0.c2, agg_stage_0.c3, agg_stage_0.c4), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Node Type': 'Sort' + }, + JIT: { + Functions: 5, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_100m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q2: + +{ + queryPlanner: { + Plan: { + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Width': 64, + Plans: [ + { + 'Total Cost': 12059315.15, + 'Plan Width': 64, + Plans: [ + { + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Total Cost': 11764377.8, + 'Plan Rows': 23594988, + 'Plan Width': 192, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Node Type': 'Gather Merge', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 8367490.46, + 'Plan Rows': 23594988, + 'Plan Width': 539, + 'Workers Planned': 2, + 'Async Capable': false, + 'Total Cost': 11115515.63, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 8366490.43, + 'Total Cost': 8391068.54, + 'Plan Rows': 9831245, + 'Plan Width': 539, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Sort', + Plans: [ + { + 'Startup Cost': 5898.75, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + Plans: [ + { + 'Plan Rows': 9438, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Parallel Aware': false, + 'Startup Cost': 0, + 'Total Cost': 0, + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1' + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Relation Name': 'documents_16', + Alias: 'collection', + 'Total Cost': 67223.8, + 'Plan Rows': 9831245, + 'Plan Width': 539, + 'Parallel Aware': true + } + ] + } + ] + } + ], + 'Node Type': 'Aggregate', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 8367490.46 + } + ], + 'Node Type': 'Subquery Scan', + 'Async Capable': false, + Alias: 'agg_stage_1', + 'Plan Rows': 23594988, + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 8367490.46 + } + ], + 'Node Type': 'Sort', + 'Startup Cost': 17610105.68, + 'Total Cost': 17669093.15, + 'Plan Rows': 23594988, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_1.c1, agg_stage_1.c2, agg_stage_1.c3, agg_stage_1.c4, agg_stage_1.c5, agg_stage_1.c6), 'BSONHEX3e000000026576656e740005000000245f69640010636f756e7400010000000375736572730017000000022473697a650007000000247573657273000000'::documentdb_core.bson), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ] + }, + JIT: { + Functions: 8, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create' + } + }, + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + }, + users: { + '$addToSet': '$did' + } + } + }, + { + '$project': { + event: '$_id', + count: 1, + users: { + '$size': '$users' + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_100m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q3: + +{ + queryPlanner: { + Plan: { + 'Startup Cost': 3122411.7, + 'Total Cost': 3142074.18, + 'Plan Width': 96, + Plans: [ + { + 'Node Type': 'Subquery Scan', + 'Parallel Aware': false, + Alias: 'agg_stage_2', + 'Startup Cost': 668409.85, + 'Total Cost': 1011888.93, + 'Plan Rows': 7864995, + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Plan Width': 96, + Plans: [ + { + 'Plan Rows': 7864995, + 'Planned Partitions': 256, + Plans: [ + { + 'Plan Width': 32, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': false, + 'Plan Rows': 7864995, + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Parallel Aware': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Total Cost': 0, + 'Plan Rows': 94, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))" + } + ], + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Relation Name': 'documents_16', + Alias: 'collection', + 'Startup Cost': 1966.25, + 'Total Cost': 41668.06 + } + ], + 'Node Type': 'Aggregate', + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Startup Cost': 668409.85, + 'Plan Width': 128, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX7d000000105f69640000000000026576656e74001300000024636f6d6d69742e636f6c6c656374696f6e0003686f75725f6f665f64617900440000000324686f757200380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e80300000000000000'::documentdb_core.bson), 'BSONHEX3c000000030035000000026576656e740007000000246576656e740002686f75725f6f665f646179000d00000024686f75725f6f665f646179000000'::documentdb_core.bson, true)" + ], + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 913576.49 + } + ] + } + ], + 'Plan Rows': 7864995, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX1a000000105f69642e686f75725f6f665f646179000100000000'::documentdb_core.bson)) NULLS FIRST", + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX14000000105f69642e6576656e74000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false + }, + JIT: { + Functions: 7, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': { + '$in': [ + 'app.bsky.feed.post', + 'app.bsky.feed.repost', + 'app.bsky.feed.like' + ] + } + } + }, + { + '$project': { + _id: 0, + event: '$commit.collection', + hour_of_day: { + '$hour': { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + } + }, + { + '$group': { + _id: { + event: '$event', + hour_of_day: '$hour_of_day' + }, + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id.hour_of_day': 1, + '_id.event': 1 + } + } + ], + cursor: {}, + '$db': 'bluesky_100m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q4: + +{ + queryPlanner: { + JIT: { + Functions: 11, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + }, + Plan: { + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 10661331.63, + 'Plan Rows': 11797494, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX180000001066697273745f706f73745f7473000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + Plans: [ + { + 'Plan Width': 64, + 'Async Capable': false, + Plans: [ + { + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Aggregate', + Strategy: 'Sorted', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 10036951.47, + 'Plan Rows': 11797494, + 'Plan Width': 128, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 10036951.47, + 'Total Cost': 10066445.21, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Async Capable': false, + 'Relation Name': 'documents_16', + 'Startup Cost': 2949.37, + 'Plan Rows': 11797494, + 'Plan Width': 539, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parent Relationship': 'Outer', + Alias: 'collection', + 'Total Cost': 62313.68, + Plans: [ + { + 'Plan Width': 0, + 'Node Type': 'Bitmap Index Scan', + 'Parallel Aware': false, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parent Relationship': 'Outer', + 'Total Cost': 0, + 'Plan Rows': 94 + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': false + } + ], + 'Node Type': 'Sort', + 'Plan Rows': 11797494, + 'Plan Width': 539 + } + ], + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Total Cost': 10390876.29 + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + Alias: 'agg_stage_2', + 'Startup Cost': 10036951.47, + 'Total Cost': 10479357.5, + 'Plan Rows': 11797494 + } + ], + 'Node Type': 'Sort', + 'Startup Cost': 10631837.9, + 'Plan Width': 64 + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 10631837.9, + 'Total Cost': 10631837.9, + 'Plan Rows': 3, + 'Plan Width': 64 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + first_post_ts: { + '$min': '$timestamp' + } + } + }, + { + '$sort': { + first_post_ts: 1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_100m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q5: + +{ + queryPlanner: { + JIT: { + Functions: 11, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + }, + Plan: { + 'Async Capable': false, + 'Startup Cost': 10808800.31, + 'Total Cost': 10808800.31, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Startup Cost': 10808800.31, + 'Total Cost': 10838294.04, + 'Plan Rows': 11797494, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4, agg_stage_2.c5, agg_stage_2.c6), 'BSONHEX790000000361637469766974795f7370616e006500000003246461746544696666005500000002737461727444617465000f000000246d696e5f74696d657374616d700002656e6444617465000f000000246d61785f74696d657374616d700002756e6974000c0000006d696c6c697365636f6e6400000000'::documentdb_core.bson), 'BSONHEX180000001061637469766974795f7370616e00ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + 'Async Capable': false, + 'Total Cost': 10656319.91, + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + Alias: 'agg_stage_2', + 'Startup Cost': 10036951.47, + 'Plan Rows': 11797494, + 'Plan Width': 64, + Plans: [ + { + 'Total Cost': 10508851.23, + 'Plan Width': 192, + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Plan Rows': 11797494, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Startup Cost': 10036951.47, + 'Total Cost': 10066445.21, + 'Plan Rows': 11797494, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'collection', + 'Startup Cost': 2949.37, + 'Total Cost': 62313.68, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 0, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Total Cost': 0, + 'Plan Rows': 94, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))" + } + ], + 'Parallel Aware': false, + 'Relation Name': 'documents_16', + 'Plan Rows': 11797494, + 'Plan Width': 539 + } + ], + 'Async Capable': false, + 'Plan Width': 539, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false + } + ], + 'Node Type': 'Aggregate', + 'Async Capable': false, + 'Startup Cost': 10036951.47 + } + ], + 'Parallel Aware': false + } + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + min_timestamp: { + '$min': '$timestamp' + }, + max_timestamp: { + '$max': '$timestamp' + } + } + }, + { + '$project': { + activity_span: { + '$dateDiff': { + startDate: '$min_timestamp', + endDate: '$max_timestamp', + unit: 'millisecond' + } + } + } + }, + { + '$sort': { + activity_span: -1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_100m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} diff --git a/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_zstd.index_usage b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_zstd.index_usage new file mode 100644 index 0000000..0c6aaa5 --- /dev/null +++ b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_100m_zstd.index_usage @@ -0,0 +1,773 @@ +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q1: + +{ + queryPlanner: { + Plan: { + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_0.c1, agg_stage_0.c2, agg_stage_0.c3, agg_stage_0.c4), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + 'Async Capable': false, + Alias: 'agg_stage_0', + 'Startup Cost': 22836829.82, + 'Total Cost': 27541039.78, + 'Plan Rows': 94379134, + Plans: [ + { + 'Async Capable': false, + 'Startup Cost': 22836829.82, + 'Total Cost': 26833196.28, + 'Plan Rows': 94379134, + 'Plan Width': 128, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ], + 'Planned Partitions': 256, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Relation Name': 'documents_18', + Alias: 'collection', + 'Plan Rows': 94379134, + 'Node Type': 'Seq Scan', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 0, + 'Total Cost': 8267051.01, + 'Plan Width': 32 + } + ], + 'Node Type': 'Aggregate', + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Plan Width': 64, + 'Parallel Aware': false + } + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 50687792.67, + 'Total Cost': 50923740.5, + 'Plan Rows': 94379134 + }, + JIT: { + Functions: 5, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_100m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q2: + +{ + queryPlanner: { + JIT: { + Functions: 8, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + }, + Plan: { + 'Node Type': 'Sort', + 'Async Capable': false, + 'Plan Rows': 23594784, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_1.c1, agg_stage_1.c2, agg_stage_1.c3, agg_stage_1.c4, agg_stage_1.c5, agg_stage_1.c6), 'BSONHEX3e000000026576656e740005000000245f69640010636f756e7400010000000375736572730017000000022473697a650007000000247573657273000000'::documentdb_core.bson), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Parallel Aware': false, + 'Startup Cost': 17609952.56, + 'Total Cost': 17668939.52, + Plans: [ + { + 'Startup Cost': 8367416.71, + 'Plan Rows': 23594784, + Plans: [ + { + 'Parent Relationship': 'Subquery', + 'Startup Cost': 8367416.71, + 'Total Cost': 11764274.69, + 'Plan Width': 192, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Total Cost': 11115418.13, + 'Plan Rows': 23594784, + 'Plan Width': 541, + 'Workers Planned': 2, + 'Async Capable': false, + 'Startup Cost': 8367416.71, + Plans: [ + { + Plans: [ + { + Alias: 'collection', + 'Total Cost': 67223.54, + 'Plan Rows': 9831160, + 'Plan Width': 541, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': true, + 'Async Capable': false, + 'Relation Name': 'documents_18', + 'Startup Cost': 5898.7, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Startup Cost': 0, + 'Total Cost': 0, + 'Plan Rows': 9438, + 'Plan Width': 0 + } + ], + 'Parent Relationship': 'Outer' + } + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + 'Startup Cost': 8366416.69, + 'Total Cost': 8390994.59, + 'Plan Rows': 9831160, + 'Plan Width': 541 + } + ], + 'Node Type': 'Gather Merge', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false + } + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Rows': 23594784, + Strategy: 'Sorted' + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + Alias: 'agg_stage_1', + 'Total Cost': 12059209.49, + 'Plan Width': 64, + 'Async Capable': false + } + ] + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create' + } + }, + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + }, + users: { + '$addToSet': '$did' + } + } + }, + { + '$project': { + event: '$_id', + count: 1, + users: { + '$size': '$users' + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_100m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q3: + +{ + queryPlanner: { + Plan: { + Plans: [ + { + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Plan Rows': 7864927, + 'Plan Width': 96, + Plans: [ + { + 'Node Type': 'Aggregate', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 668404.07, + 'Plan Width': 128, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX7d000000105f69640000000000026576656e74001300000024636f6d6d69742e636f6c6c656374696f6e0003686f75725f6f665f64617900440000000324686f757200380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e80300000000000000'::documentdb_core.bson), 'BSONHEX3c000000030035000000026576656e740007000000246576656e740002686f75725f6f665f646179000d00000024686f75725f6f665f646179000000'::documentdb_core.bson, true)" + ], + 'Planned Partitions': 256, + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Total Cost': 913568.59, + 'Plan Rows': 7864927, + Plans: [ + { + 'Parallel Aware': false, + 'Startup Cost': 1966.23, + 'Total Cost': 41667.7, + 'Plan Rows': 7864927, + 'Plan Width': 32, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Total Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parallel Aware': false, + 'Startup Cost': 0, + 'Plan Rows': 94, + 'Plan Width': 0 + } + ], + 'Async Capable': false, + 'Relation Name': 'documents_18', + Alias: 'collection', + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer' + } + ] + } + ], + 'Startup Cost': 668404.07, + 'Total Cost': 1011880.18 + } + ], + 'Node Type': 'Sort', + 'Startup Cost': 3122384.17, + 'Total Cost': 3142046.49, + 'Plan Rows': 7864927, + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Width': 96, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX1a000000105f69642e686f75725f6f665f646179000100000000'::documentdb_core.bson)) NULLS FIRST", + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX14000000105f69642e6576656e74000100000000'::documentdb_core.bson)) NULLS FIRST" + ] + }, + JIT: { + Functions: 7, + Options: { + Deforming: true, + Inlining: true, + Optimization: true, + Expressions: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': { + '$in': [ + 'app.bsky.feed.post', + 'app.bsky.feed.repost', + 'app.bsky.feed.like' + ] + } + } + }, + { + '$project': { + _id: 0, + event: '$commit.collection', + hour_of_day: { + '$hour': { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + } + }, + { + '$group': { + _id: { + event: '$event', + hour_of_day: '$hour_of_day' + }, + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id.hour_of_day': 1, + '_id.event': 1 + } + } + ], + cursor: {}, + '$db': 'bluesky_100m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q4: + +{ + queryPlanner: { + Plan: { + 'Async Capable': false, + 'Startup Cost': 10631746, + 'Total Cost': 10631746.01, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Async Capable': false, + 'Startup Cost': 10631746, + 'Total Cost': 10661239.48, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX180000001066697273745f706f73745f7473000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + Plans: [ + { + 'Async Capable': false, + 'Startup Cost': 10036864.72, + Plans: [ + { + 'Startup Cost': 10036864.72, + 'Plan Rows': 11797392, + 'Node Type': 'Aggregate', + Strategy: 'Sorted', + 'Parallel Aware': false, + 'Total Cost': 10390786.48, + 'Plan Width': 128, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Total Cost': 10066358.2, + Plans: [ + { + Alias: 'collection', + 'Startup Cost': 2949.35, + 'Total Cost': 62313.14, + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': false, + 'Relation Name': 'documents_18', + 'Plan Rows': 11797392, + 'Plan Width': 541, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Total Cost': 0, + 'Plan Rows': 94, + 'Plan Width': 0 + } + ], + 'Parent Relationship': 'Outer', + 'Async Capable': false + } + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Startup Cost': 10036864.72, + 'Plan Rows': 11797392, + 'Plan Width': 541, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Parallel Aware': false, + 'Async Capable': false + } + ], + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Async Capable': false + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + Alias: 'agg_stage_2', + 'Total Cost': 10479266.92, + 'Plan Rows': 11797392, + 'Plan Width': 64 + } + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Plan Rows': 11797392 + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false + }, + JIT: { + Functions: 11, + Options: { + Expressions: true, + Deforming: true, + Inlining: true, + Optimization: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + first_post_ts: { + '$min': '$timestamp' + } + } + }, + { + '$sort': { + first_post_ts: 1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_100m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q5: + +{ + queryPlanner: { + Plan: { + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 10808706.88, + 'Total Cost': 10808706.89, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4, agg_stage_2.c5, agg_stage_2.c6), 'BSONHEX790000000361637469766974795f7370616e006500000003246461746544696666005500000002737461727444617465000f000000246d696e5f74696d657374616d700002656e6444617465000f000000246d61785f74696d657374616d700002756e6974000c0000006d696c6c697365636f6e6400000000'::documentdb_core.bson), 'BSONHEX180000001061637469766974795f7370616e00ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + 'Parallel Aware': false, + Alias: 'agg_stage_2', + 'Startup Cost': 10036864.72, + 'Total Cost': 10656227.8, + 'Async Capable': false, + 'Plan Rows': 11797392, + 'Plan Width': 64, + Plans: [ + { + Plans: [ + { + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Plan Width': 541, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Plan Rows': 11797392, + 'Plan Width': 541, + 'Parallel Aware': false, + Alias: 'collection', + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Total Cost': 0, + 'Plan Rows': 94, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parallel Aware': false + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Relation Name': 'documents_18', + 'Startup Cost': 2949.35, + 'Total Cost': 62313.14 + } + ], + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 10036864.72, + 'Total Cost': 10066358.2, + 'Plan Rows': 11797392 + } + ], + 'Node Type': 'Aggregate', + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 10508760.4, + 'Plan Rows': 11797392, + 'Plan Width': 192, + 'Parent Relationship': 'Subquery', + 'Startup Cost': 10036864.72, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ] + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer' + } + ], + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 10808706.88, + 'Total Cost': 10838200.36, + 'Plan Rows': 11797392 + } + ], + 'Node Type': 'Limit' + }, + JIT: { + Functions: 11, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + min_timestamp: { + '$min': '$timestamp' + }, + max_timestamp: { + '$max': '$timestamp' + } + } + }, + { + '$project': { + activity_span: { + '$dateDiff': { + startDate: '$min_timestamp', + endDate: '$max_timestamp', + unit: 'millisecond' + } + } + } + }, + { + '$sort': { + activity_span: -1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_100m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} diff --git a/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_snappy.index_usage b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_snappy.index_usage new file mode 100644 index 0000000..91a7b33 --- /dev/null +++ b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_snappy.index_usage @@ -0,0 +1,746 @@ +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q1: + +{ + queryPlanner: { + Plan: { + 'Node Type': 'Sort', + 'Async Capable': false, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 1322060.54, + 'Total Cost': 1606275.52, + 'Plan Width': 64, + 'Async Capable': false, + Alias: 'agg_stage_0', + 'Plan Rows': 7857347, + Plans: [ + { + Plans: [ + { + 'Parallel Aware': false, + Alias: 'collection', + 'Plan Width': 32, + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Relation Name': 'documents_12', + 'Startup Cost': 0, + 'Total Cost': 695928.21, + 'Plan Rows': 7857347, + 'Node Type': 'Seq Scan' + } + ], + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Startup Cost': 1322060.54, + 'Total Cost': 1547345.42, + 'Plan Rows': 7857347, + 'Planned Partitions': 256, + 'Node Type': 'Aggregate', + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Async Capable': false, + 'Plan Width': 128, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ] + } + ], + 'Node Type': 'Subquery Scan' + } + ], + 'Parallel Aware': false, + 'Startup Cost': 3097004.18, + 'Total Cost': 3116647.55, + 'Plan Rows': 7857347, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_0.c1, agg_stage_0.c2, agg_stage_0.c3, agg_stage_0.c4), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ] + }, + JIT: { + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + }, + Functions: 5 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_10m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q2: + +{ + queryPlanner: { + Plan: { + 'Async Capable': false, + 'Startup Cost': 1860549.73, + 'Total Cost': 1865460.57, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_1.c1, agg_stage_1.c2, agg_stage_1.c3, agg_stage_1.c4, agg_stage_1.c5, agg_stage_1.c6), 'BSONHEX3e000000026576656e740005000000245f69640010636f756e7400010000000375736572730017000000022473697a650007000000247573657273000000'::documentdb_core.bson), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Plan Rows': 1964337, + 'Plan Width': 64, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'agg_stage_1', + 'Startup Cost': 1172342.41, + 'Total Cost': 1507507.41, + 'Plan Width': 64, + 'Parallel Aware': false, + 'Plan Rows': 1964337, + Plans: [ + { + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 1172342.41, + 'Plan Rows': 1964337, + 'Planned Partitions': 256, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Relation Name': 'documents_12', + Alias: 'collection', + 'Startup Cost': 491.08, + 'Total Cost': 8472.73, + 'Plan Width': 547, + 'Async Capable': false, + 'Plan Rows': 1964337, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + Plans: [ + { + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Plan Rows': 786, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 0, + 'Total Cost': 0 + } + ] + } + ], + 'Node Type': 'Aggregate', + 'Parent Relationship': 'Subquery', + 'Total Cost': 1482953.19, + 'Plan Width': 192, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ] + } + ], + 'Node Type': 'Subquery Scan' + } + ] + }, + JIT: { + Functions: 7, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create' + } + }, + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + }, + users: { + '$addToSet': '$did' + } + } + }, + { + '$project': { + event: '$_id', + count: 1, + users: { + '$size': '$users' + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_10m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q3: + +{ + queryPlanner: { + Plan: { + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 216277.51, + 'Plan Rows': 654779, + 'Plan Width': 96, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX1a000000105f69642e686f75725f6f665f646179000100000000'::documentdb_core.bson)) NULLS FIRST", + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX14000000105f69642e6576656e74000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + 'Startup Cost': 214640.56, + Plans: [ + { + 'Total Cost': 84242.79, + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Startup Cost': 55647.36, + 'Plan Rows': 654779, + 'Plan Width': 96, + Plans: [ + { + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 55647.36, + 'Plan Rows': 654779, + 'Plan Width': 128, + 'Planned Partitions': 32, + 'Node Type': 'Aggregate', + 'Parent Relationship': 'Subquery', + 'Total Cost': 76058.05, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX7d000000105f69640000000000026576656e74001300000024636f6d6d69742e636f6c6c656374696f6e0003686f75725f6f665f64617900440000000324686f757200380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e80300000000000000'::documentdb_core.bson), 'BSONHEX3c000000030035000000026576656e740007000000246576656e740002686f75725f6f665f646179000d00000024686f75725f6f665f646179000000'::documentdb_core.bson, true)" + ], + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_12', + Alias: 'collection', + 'Total Cost': 3469.66, + 'Plan Width': 32, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Parent Relationship': 'Outer', + 'Startup Cost': 163.69, + 'Plan Rows': 654779, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Plan Rows': 8, + 'Plan Width': 0, + 'Node Type': 'Bitmap Index Scan', + 'Async Capable': false, + 'Total Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))" + } + ] + } + ] + } + ], + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'agg_stage_2' + } + ] + }, + JIT: { + Functions: 7, + Options: { + Inlining: false, + Optimization: false, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': { + '$in': [ + 'app.bsky.feed.post', + 'app.bsky.feed.repost', + 'app.bsky.feed.like' + ] + } + } + }, + { + '$project': { + _id: 0, + event: '$commit.collection', + hour_of_day: { + '$hour': { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + } + }, + { + '$group': { + _id: { + event: '$event', + hour_of_day: '$hour_of_day' + }, + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id.hour_of_day': 1, + '_id.event': 1 + } + } + ], + cursor: {}, + '$db': 'bluesky_10m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q4: + +{ + queryPlanner: { + Plan: { + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Startup Cost': 635880.36, + 'Total Cost': 638335.78, + 'Plan Rows': 982168, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX180000001066697273745f706f73745f7473000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + Plans: [ + { + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 586354.72, + 'Total Cost': 623186.02, + 'Plan Rows': 982168, + 'Plan Width': 64, + Plans: [ + { + Strategy: 'Sorted', + 'Startup Cost': 586354.72, + 'Total Cost': 615819.76, + 'Plan Rows': 982168, + 'Plan Width': 128, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Async Capable': false, + Plans: [ + { + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Plan Rows': 982168, + Plans: [ + { + 'Plan Rows': 982168, + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Plan Rows': 8, + 'Parallel Aware': false, + 'Total Cost': 0, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))" + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Startup Cost': 245.54, + 'Plan Width': 547, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_12', + Alias: 'collection', + 'Total Cost': 5188.45 + } + ], + 'Parallel Aware': false, + 'Startup Cost': 586354.72, + 'Total Cost': 588810.14, + 'Plan Width': 547, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ] + } + ] + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + Alias: 'agg_stage_2' + } + ], + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Plan Width': 64, + 'Node Type': 'Sort', + 'Async Capable': false + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 635880.36, + 'Total Cost': 635880.36 + }, + JIT: { + Functions: 11, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + first_post_ts: { + '$min': '$timestamp' + } + } + }, + { + '$sort': { + first_post_ts: 1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_10m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q5: + +{ + queryPlanner: { + Plan: { + 'Plan Width': 64, + Plans: [ + { + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4, agg_stage_2.c5, agg_stage_2.c6), 'BSONHEX790000000361637469766974795f7370616e006500000003246461746544696666005500000002737461727444617465000f000000246d696e5f74696d657374616d700002656e6444617465000f000000246d61785f74696d657374616d700002756e6974000c0000006d696c6c697365636f6e6400000000'::documentdb_core.bson), 'BSONHEX180000001061637469766974795f7370616e00ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Node Type': 'Sort', + 'Async Capable': false, + 'Plan Rows': 982168, + 'Plan Width': 64, + Plans: [ + { + 'Total Cost': 637918.54, + 'Plan Rows': 982168, + Plans: [ + { + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 625641.44, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Aggregate', + 'Startup Cost': 586354.72, + 'Plan Rows': 982168, + 'Plan Width': 192, + Plans: [ + { + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 586354.72, + 'Total Cost': 588810.14, + 'Plan Width': 547, + Plans: [ + { + 'Async Capable': false, + 'Relation Name': 'documents_12', + Alias: 'collection', + 'Startup Cost': 245.54, + 'Plan Rows': 982168, + 'Plan Width': 547, + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Total Cost': 0, + 'Parallel Aware': false, + 'Plan Rows': 8, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))" + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Total Cost': 5188.45, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))" + } + ], + 'Parallel Aware': false, + 'Plan Rows': 982168 + } + ] + } + ], + 'Node Type': 'Subquery Scan', + 'Parallel Aware': false, + 'Plan Width': 64, + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Startup Cost': 586354.72 + } + ], + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 650612.88, + 'Total Cost': 653068.3 + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 650612.88, + 'Total Cost': 650612.88, + 'Plan Rows': 3 + }, + JIT: { + Options: { + Deforming: true, + Inlining: true, + Optimization: true, + Expressions: true + }, + Functions: 11 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + min_timestamp: { + '$min': '$timestamp' + }, + max_timestamp: { + '$max': '$timestamp' + } + } + }, + { + '$project': { + activity_span: { + '$dateDiff': { + startDate: '$min_timestamp', + endDate: '$max_timestamp', + unit: 'millisecond' + } + } + } + }, + { + '$sort': { + activity_span: -1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_10m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} diff --git a/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_zstd.index_usage b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_zstd.index_usage new file mode 100644 index 0000000..12abf0e --- /dev/null +++ b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_10m_zstd.index_usage @@ -0,0 +1,746 @@ +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q1: + +{ + queryPlanner: { + Plan: { + 'Async Capable': false, + 'Startup Cost': 3098726.54, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_0.c1, agg_stage_0.c2, agg_stage_0.c3, agg_stage_0.c4), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Node Type': 'Sort', + 'Total Cost': 3118382.97, + 'Plan Rows': 7862574, + 'Plan Width': 64, + Plans: [ + { + 'Plan Width': 64, + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Total Cost': 1606969.52, + 'Plan Rows': 7862574, + Plans: [ + { + 'Async Capable': false, + 'Startup Cost': 1322565.48, + 'Total Cost': 1548000.21, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Relation Name': 'documents_14', + Alias: 'collection', + 'Total Cost': 696016.61, + 'Plan Width': 32, + 'Node Type': 'Seq Scan', + 'Parallel Aware': false, + 'Startup Cost': 0, + 'Plan Rows': 7862574 + } + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Plan Rows': 7862574, + 'Plan Width': 128, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ], + 'Planned Partitions': 256, + Strategy: 'Hashed' + } + ], + 'Parallel Aware': false, + Alias: 'agg_stage_0', + 'Startup Cost': 1322565.48 + } + ], + 'Parallel Aware': false + }, + JIT: { + Functions: 5, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_10m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q2: + +{ + queryPlanner: { + Plan: { + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 1861794.78, + 'Plan Rows': 1965644, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_1.c1, agg_stage_1.c2, agg_stage_1.c3, agg_stage_1.c4, agg_stage_1.c5, agg_stage_1.c6), 'BSONHEX3e000000026576656e740005000000245f69640010636f756e7400010000000375736572730017000000022473697a650007000000247573657273000000'::documentdb_core.bson), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + 'Async Capable': false, + Alias: 'agg_stage_1', + 'Startup Cost': 1173120.4, + Plans: [ + { + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Async Capable': false, + 'Plan Rows': 1965644, + 'Plan Width': 192, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ], + 'Node Type': 'Aggregate', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Startup Cost': 1173120.4, + 'Total Cost': 1483937.86, + 'Planned Partitions': 256, + Plans: [ + { + Alias: 'collection', + 'Startup Cost': 491.41, + 'Plan Width': 546, + Plans: [ + { + 'Parallel Aware': false, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Plan Rows': 786, + 'Plan Width': 0, + 'Startup Cost': 0, + 'Total Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer' + } + ], + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 8476.33, + 'Plan Rows': 1965644, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Relation Name': 'documents_14' + } + ] + } + ], + 'Total Cost': 1508508.41, + 'Plan Rows': 1965644, + 'Plan Width': 64, + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false + } + ], + 'Total Cost': 1866708.89, + 'Plan Width': 64 + }, + JIT: { + Functions: 7, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create' + } + }, + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + }, + users: { + '$addToSet': '$did' + } + } + }, + { + '$project': { + event: '$_id', + count: 1, + users: { + '$size': '$users' + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_10m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q3: + +{ + queryPlanner: { + Plan: { + 'Parallel Aware': false, + 'Total Cost': 216421.7, + 'Plan Rows': 655214, + 'Plan Width': 96, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX1a000000105f69642e686f75725f6f665f646179000100000000'::documentdb_core.bson)) NULLS FIRST", + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX14000000105f69642e6576656e74000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + 'Async Capable': false, + 'Startup Cost': 214783.67, + Plans: [ + { + 'Total Cost': 84298.73, + 'Plan Width': 96, + Plans: [ + { + 'Node Type': 'Aggregate', + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 76108.56, + 'Plan Rows': 655214, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX7d000000105f69640000000000026576656e74001300000024636f6d6d69742e636f6c6c656374696f6e0003686f75725f6f665f64617900440000000324686f757200380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e80300000000000000'::documentdb_core.bson), 'BSONHEX3c000000030035000000026576656e740007000000246576656e740002686f75725f6f665f646179000d00000024686f75725f6f665f646179000000'::documentdb_core.bson, true)" + ], + 'Parent Relationship': 'Subquery', + 'Startup Cost': 55684.31, + 'Plan Width': 128, + 'Planned Partitions': 32, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'collection', + 'Parent Relationship': 'Outer', + 'Relation Name': 'documents_14', + 'Startup Cost': 163.8, + 'Total Cost': 3471.94, + 'Plan Rows': 655214, + 'Plan Width': 32, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Total Cost': 0, + 'Plan Rows': 8, + 'Plan Width': 0 + } + ] + } + ] + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Plan Rows': 655214, + 'Startup Cost': 55684.31 + } + ], + 'Node Type': 'Sort' + }, + JIT: { + Options: { + Inlining: false, + Optimization: false, + Expressions: true, + Deforming: true + }, + Functions: 7 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': { + '$in': [ + 'app.bsky.feed.post', + 'app.bsky.feed.repost', + 'app.bsky.feed.like' + ] + } + } + }, + { + '$project': { + _id: 0, + event: '$commit.collection', + hour_of_day: { + '$hour': { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + } + }, + { + '$group': { + _id: { + event: '$event', + hour_of_day: '$hour_of_day' + }, + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id.hour_of_day': 1, + '_id.event': 1 + } + } + ], + cursor: {}, + '$db': 'bluesky_10m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q4: + +{ + queryPlanner: { + Plan: { + 'Total Cost': 636308.59, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Async Capable': false, + 'Total Cost': 638765.63, + 'Plan Rows': 982822, + 'Parallel Aware': false, + 'Startup Cost': 636308.58, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX180000001066697273745f706f73745f7473000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + Plans: [ + { + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 586749.96, + 'Total Cost': 623605.79, + Plans: [ + { + Plans: [ + { + 'Plan Rows': 982822, + 'Total Cost': 589207.02, + 'Plan Width': 546, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Total Cost': 5191.89, + 'Plan Rows': 982822, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Total Cost': 0, + 'Plan Rows': 8, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parallel Aware': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0 + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_14', + Alias: 'collection', + 'Startup Cost': 245.71, + 'Plan Width': 546 + } + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 586749.96 + } + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Plan Rows': 982822, + Strategy: 'Sorted', + 'Async Capable': false, + 'Startup Cost': 586749.96, + 'Total Cost': 616234.62, + 'Plan Width': 128, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ] + } + ], + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Plan Rows': 982822, + 'Plan Width': 64 + } + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer' + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 636308.58 + }, + JIT: { + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + }, + Functions: 11 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + first_post_ts: { + '$min': '$timestamp' + } + } + }, + { + '$sort': { + first_post_ts: 1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_10m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q5: + +{ + queryPlanner: { + Plan: { + 'Startup Cost': 651050.91, + 'Total Cost': 651050.92, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Rows': 982822, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4, agg_stage_2.c5, agg_stage_2.c6), 'BSONHEX790000000361637469766974795f7370616e006500000003246461746544696666005500000002737461727444617465000f000000246d696e5f74696d657374616d700002656e6444617465000f000000246d61785f74696d657374616d700002756e6974000c0000006d696c6c697365636f6e6400000000'::documentdb_core.bson), 'BSONHEX180000001061637469766974795f7370616e00ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Node Type': 'Sort', + 'Startup Cost': 651050.91, + 'Total Cost': 653507.96, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Startup Cost': 586749.96, + 'Total Cost': 638348.12, + 'Plan Rows': 982822, + 'Plan Width': 64, + 'Node Type': 'Subquery Scan', + Plans: [ + { + 'Startup Cost': 586749.96, + 'Total Cost': 626062.84, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Rows': 982822, + 'Plan Width': 192, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Startup Cost': 586749.96, + 'Total Cost': 589207.02, + 'Plan Width': 546, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'collection', + 'Startup Cost': 245.71, + 'Plan Rows': 982822, + 'Plan Width': 546, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parallel Aware': false, + 'Relation Name': 'documents_14', + 'Total Cost': 5191.89, + Plans: [ + { + 'Plan Width': 0, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 0, + 'Total Cost': 0, + 'Plan Rows': 8 + } + ] + } + ], + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Rows': 982822, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Sort' + } + ], + Strategy: 'Sorted' + } + ] + } + ] + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false + }, + JIT: { + Functions: 11, + Options: { + Inlining: true, + Optimization: true, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + min_timestamp: { + '$min': '$timestamp' + }, + max_timestamp: { + '$max': '$timestamp' + } + } + }, + { + '$project': { + activity_span: { + '$dateDiff': { + startDate: '$min_timestamp', + endDate: '$max_timestamp', + unit: 'millisecond' + } + } + } + }, + { + '$sort': { + activity_span: -1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_10m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} diff --git a/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_snappy.index_usage b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_snappy.index_usage new file mode 100644 index 0000000..74f8045 --- /dev/null +++ b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_snappy.index_usage @@ -0,0 +1,733 @@ +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q1: + +{ + queryPlanner: { + Plan: { + 'Node Type': 'Sort', + 'Startup Cost': 378964.95, + 'Total Cost': 381467.45, + 'Plan Rows': 1001000, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_0.c1, agg_stage_0.c2, agg_stage_0.c3, agg_stage_0.c4), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + 'Plan Width': 64, + 'Node Type': 'Subquery Scan', + 'Parallel Aware': false, + Alias: 'agg_stage_0', + Plans: [ + { + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Startup Cost': 167721.19, + 'Plan Rows': 1001000, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ], + 'Planned Partitions': 32, + 'Node Type': 'Aggregate', + 'Async Capable': false, + 'Total Cost': 196421.73, + 'Plan Width': 128, + Plans: [ + { + 'Node Type': 'Seq Scan', + 'Parent Relationship': 'Outer', + 'Plan Rows': 1001000, + 'Plan Width': 32, + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_8', + Alias: 'collection', + 'Startup Cost': 0, + 'Total Cost': 87954 + } + ], + Strategy: 'Hashed' + } + ], + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 167721.19, + 'Total Cost': 203929.23, + 'Plan Rows': 1001000 + } + ], + 'Parallel Aware': false, + 'Async Capable': false + }, + JIT: { + Functions: 5, + Options: { + Optimization: false, + Expressions: true, + Deforming: true, + Inlining: false + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_1m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q2: + +{ + queryPlanner: { + Plan: { + 'Node Type': 'Sort', + 'Startup Cost': 187467.31, + 'Plan Rows': 250250, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_1.c1, agg_stage_1.c2, agg_stage_1.c3, agg_stage_1.c4, agg_stage_1.c5, agg_stage_1.c6), 'BSONHEX3e000000026576656e740005000000245f69640010636f756e7400010000000375736572730017000000022473697a650007000000247573657273000000'::documentdb_core.bson), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 188092.94, + 'Plan Width': 64, + Plans: [ + { + 'Startup Cost': 144981.51, + 'Total Cost': 155617.13, + 'Plan Width': 64, + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Plan Rows': 250250, + Plans: [ + { + 'Parent Relationship': 'Subquery', + 'Plan Width': 192, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Startup Cost': 144981.51, + 'Total Cost': 145607.13, + 'Plan Width': 542, + Plans: [ + { + Plans: [ + { + 'Plan Width': 0, + 'Node Type': 'Bitmap Index Scan', + 'Parallel Aware': false, + 'Total Cost': 0, + 'Plan Rows': 100, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0 + } + ], + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_8', + 'Startup Cost': 62.56, + 'Plan Width': 542, + Alias: 'collection', + 'Total Cost': 1078.83, + 'Plan Rows': 250250, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))" + } + ], + 'Async Capable': false, + 'Plan Rows': 250250, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false + } + ], + 'Node Type': 'Aggregate', + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 144981.51, + 'Total Cost': 152489.01, + 'Plan Rows': 250250 + } + ], + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'agg_stage_1' + } + ] + }, + JIT: { + Functions: 10, + Options: { + Inlining: false, + Optimization: false, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create' + } + }, + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + }, + users: { + '$addToSet': '$did' + } + } + }, + { + '$project': { + event: '$_id', + count: 1, + users: { + '$size': '$users' + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_1m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q3: + +{ + queryPlanner: { + Plan: { + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX1a000000105f69642e686f75725f6f665f646179000100000000'::documentdb_core.bson)) NULLS FIRST", + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX14000000105f69642e6576656e74000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + Plans: [ + { + 'Plan Rows': 83417, + 'Parallel Aware': false, + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Plan Width': 96, + Plans: [ + { + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Plan Width': 128, + 'Planned Partitions': 4, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Relation Name': 'documents_8', + Alias: 'collection', + 'Startup Cost': 20.85, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Plan Rows': 1, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parallel Aware': false, + 'Total Cost': 0 + } + ], + 'Parallel Aware': false, + 'Total Cost': 441.96, + 'Plan Rows': 83417, + 'Plan Width': 32 + } + ], + 'Async Capable': false, + 'Startup Cost': 7089.25, + 'Total Cost': 9689.52, + 'Plan Rows': 83417, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX7d000000105f69640000000000026576656e74001300000024636f6d6d69742e636f6c6c656374696f6e0003686f75725f6f665f64617900440000000324686f757200380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e80300000000000000'::documentdb_core.bson), 'BSONHEX3c000000030035000000026576656e740007000000246576656e740002686f75725f6f665f646179000d00000024686f75725f6f665f646179000000'::documentdb_core.bson, true)" + ], + 'Node Type': 'Aggregate', + Strategy: 'Hashed' + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Startup Cost': 7089.25, + 'Total Cost': 10732.23 + } + ], + 'Startup Cost': 21827.76, + 'Total Cost': 22036.3, + 'Plan Rows': 83417, + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Width': 96 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': { + '$in': [ + 'app.bsky.feed.post', + 'app.bsky.feed.repost', + 'app.bsky.feed.like' + ] + } + } + }, + { + '$project': { + _id: 0, + event: '$commit.collection', + hour_of_day: { + '$hour': { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + } + }, + { + '$group': { + _id: { + event: '$event', + hour_of_day: '$hour_of_day' + }, + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id.hour_of_day': 1, + '_id.event': 1 + } + } + ], + cursor: {}, + '$db': 'bluesky_1m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q4: + +{ + queryPlanner: { + Plan: { + Plans: [ + { + Plans: [ + { + 'Async Capable': false, + 'Startup Cost': 71986.64, + 'Total Cost': 76678.83, + 'Plan Rows': 125125, + 'Plan Width': 64, + Plans: [ + { + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Total Cost': 75740.39, + 'Plan Width': 128, + Plans: [ + { + 'Startup Cost': 71986.64, + 'Total Cost': 72299.45, + 'Plan Rows': 125125, + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Width': 542, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Node Type': 'Bitmap Index Scan', + 'Async Capable': false, + 'Total Cost': 0, + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Plan Rows': 1, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))" + } + ], + 'Node Type': 'Bitmap Heap Scan', + Alias: 'collection', + 'Plan Rows': 125125, + 'Plan Width': 542, + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_8', + 'Startup Cost': 31.28, + 'Total Cost': 660.93 + } + ] + } + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Async Capable': false, + 'Startup Cost': 71986.64, + 'Plan Rows': 125125, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Strategy: 'Sorted' + } + ], + 'Node Type': 'Subquery Scan', + Alias: 'agg_stage_2', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false + } + ], + 'Parallel Aware': false, + 'Total Cost': 78608.86, + 'Plan Rows': 125125, + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 78296.05, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX180000001066697273745f706f73745f7473000100000000'::documentdb_core.bson)) NULLS FIRST" + ] + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 78296.05, + 'Total Cost': 78296.05, + 'Plan Rows': 3, + 'Plan Width': 64 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + first_post_ts: { + '$min': '$timestamp' + } + } + }, + { + '$sort': { + first_post_ts: 1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_1m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q5: + +{ + queryPlanner: { + Plan: { + 'Startup Cost': 80172.92, + 'Total Cost': 80172.93, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 80172.92, + 'Plan Rows': 125125, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4, agg_stage_2.c5, agg_stage_2.c6), 'BSONHEX790000000361637469766974795f7370616e006500000003246461746544696666005500000002737461727444617465000f000000246d696e5f74696d657374616d700002656e6444617465000f000000246d61785f74696d657374616d700002756e6974000c0000006d696c6c697365636f6e6400000000'::documentdb_core.bson), 'BSONHEX180000001061637469766974795f7370616e00ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + 'Node Type': 'Sort', + 'Total Cost': 80485.73, + 'Plan Width': 64, + Plans: [ + { + Alias: 'agg_stage_2', + 'Startup Cost': 71986.64, + 'Plan Rows': 125125, + 'Plan Width': 64, + Plans: [ + { + 'Plan Width': 192, + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Startup Cost': 71986.64, + 'Plan Rows': 125125, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Async Capable': false, + 'Total Cost': 72299.45, + 'Plan Rows': 125125, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Startup Cost': 71986.64, + 'Plan Width': 542, + Plans: [ + { + 'Relation Name': 'documents_8', + Alias: 'collection', + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Total Cost': 0, + 'Plan Rows': 1, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0 + } + ], + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 31.28, + 'Total Cost': 660.93, + 'Plan Rows': 125125, + 'Plan Width': 542, + 'Node Type': 'Bitmap Heap Scan', + 'Parallel Aware': false + } + ], + 'Parallel Aware': false + } + ], + 'Node Type': 'Aggregate', + 'Async Capable': false, + 'Total Cost': 76991.64 + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Total Cost': 78555.7, + 'Parallel Aware': false, + 'Async Capable': false + } + ] + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + min_timestamp: { + '$min': '$timestamp' + }, + max_timestamp: { + '$max': '$timestamp' + } + } + }, + { + '$project': { + activity_span: { + '$dateDiff': { + startDate: '$min_timestamp', + endDate: '$max_timestamp', + unit: 'millisecond' + } + } + } + }, + { + '$sort': { + activity_span: -1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_1m_snappy' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} diff --git a/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_zstd.index_usage b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_zstd.index_usage new file mode 100644 index 0000000..c082a6a --- /dev/null +++ b/ferretdb/results/_index_usage/_m6i.8xlarge_bluesky_1m_zstd.index_usage @@ -0,0 +1,733 @@ +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q1: + +{ + queryPlanner: { + Plan: { + 'Startup Cost': 378669.44, + 'Plan Rows': 1000043, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_0.c1, agg_stage_0.c2, agg_stage_0.c3, agg_stage_0.c4), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + Alias: 'agg_stage_0', + 'Startup Cost': 167632.57, + 'Plan Rows': 1000043, + Plans: [ + { + 'Node Type': 'Aggregate', + Strategy: 'Hashed', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 196305.68, + 'Plan Rows': 1000043, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true)" + ], + 'Parent Relationship': 'Subquery', + 'Startup Cost': 167632.57, + 'Plan Width': 128, + 'Planned Partitions': 32, + Plans: [ + { + 'Relation Name': 'documents_10', + Alias: 'collection', + 'Total Cost': 87941.65, + 'Plan Rows': 1000043, + 'Node Type': 'Seq Scan', + 'Startup Cost': 0, + 'Plan Width': 32, + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false + } + ] + } + ], + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 203806, + 'Plan Width': 64 + } + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 381169.55, + 'Plan Width': 64 + }, + JIT: { + Functions: 5, + Options: { + Optimization: false, + Expressions: true, + Deforming: true, + Inlining: false + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_1m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q2: + +{ + queryPlanner: { + Plan: { + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_1.c1, agg_stage_1.c2, agg_stage_1.c3, agg_stage_1.c4, agg_stage_1.c5, agg_stage_1.c6), 'BSONHEX3e000000026576656e740005000000245f69640010636f756e7400010000000375736572730017000000022473697a650007000000247573657273000000'::documentdb_core.bson), 'BSONHEX1000000010636f756e7400ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ], + Plans: [ + { + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Plan Rows': 250011, + Plans: [ + { + 'Async Capable': false, + 'Total Cost': 152339.03, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ], + 'Node Type': 'Aggregate', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Startup Cost': 144838.7, + 'Plan Rows': 250011, + 'Plan Width': 192, + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Startup Cost': 144838.7, + 'Plan Rows': 250011, + 'Plan Width': 543, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'collection', + 'Startup Cost': 62.5, + 'Total Cost': 1078.17, + 'Plan Rows': 250011, + Plans: [ + { + 'Parallel Aware': false, + 'Async Capable': false, + 'Plan Rows': 100, + 'Plan Width': 0, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Total Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer' + } + ], + 'Parallel Aware': false, + 'Relation Name': 'documents_10', + 'Plan Width': 543, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson))" + } + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 145463.73, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(collection.document, 'BSONHEX1e00000002001300000024636f6d6d69742e636f6c6c656374696f6e0000'::documentdb_core.bson, true))" + ] + } + ], + Strategy: 'Sorted', + 'Parallel Aware': false + } + ], + 'Parallel Aware': false, + Alias: 'agg_stage_1', + 'Startup Cost': 144838.7, + 'Total Cost': 155464.17, + 'Plan Width': 64 + } + ], + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 187280.69, + 'Total Cost': 187905.72, + 'Node Type': 'Sort', + 'Plan Rows': 250011 + }, + JIT: { + Functions: 10, + Options: { + Inlining: false, + Optimization: false, + Expressions: true, + Deforming: true + } + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create' + } + }, + { + '$group': { + _id: '$commit.collection', + count: { + '$sum': 1 + }, + users: { + '$addToSet': '$did' + } + } + }, + { + '$project': { + event: '$_id', + count: 1, + users: { + '$size': '$users' + } + } + }, + { + '$sort': { + count: -1 + } + } + ], + cursor: {}, + '$db': 'bluesky_1m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q3: + +{ + queryPlanner: { + Plan: { + 'Startup Cost': 21806.85, + Plans: [ + { + 'Node Type': 'Subquery Scan', + 'Parallel Aware': false, + 'Total Cost': 10721.94, + 'Plan Width': 96, + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Startup Cost': 7082.46, + 'Plan Rows': 83337, + Plans: [ + { + 'Node Type': 'Aggregate', + Strategy: 'Hashed', + 'Async Capable': false, + 'Startup Cost': 7082.46, + 'Total Cost': 9680.23, + 'Plan Width': 128, + 'Group Key': [ + "documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX7d000000105f69640000000000026576656e74001300000024636f6d6d69742e636f6c6c656374696f6e0003686f75725f6f665f64617900440000000324686f757200380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e80300000000000000'::documentdb_core.bson), 'BSONHEX3c000000030035000000026576656e740007000000246576656e740002686f75725f6f665f646179000d00000024686f75725f6f665f646179000000'::documentdb_core.bson, true)" + ], + 'Planned Partitions': 4, + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Parallel Aware': false, + 'Plan Rows': 83337, + Plans: [ + { + 'Node Type': 'Bitmap Heap Scan', + 'Plan Width': 32, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + Plans: [ + { + 'Parallel Aware': false, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0, + 'Total Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@*=) 'BSONHEX6d00000004636f6d6d69742e636f6c6c656374696f6e0055000000023000130000006170702e62736b792e666565642e706f737400023100150000006170702e62736b792e666565642e7265706f737400023200130000006170702e62736b792e666565642e6c696b65000000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Plan Rows': 1, + 'Plan Width': 0 + } + ], + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Relation Name': 'documents_10', + Alias: 'collection', + 'Startup Cost': 20.83, + 'Total Cost': 441.54, + 'Plan Rows': 83337 + } + ] + } + ] + } + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 22015.19, + 'Plan Rows': 83337, + 'Plan Width': 96, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX1a000000105f69642e686f75725f6f665f646179000100000000'::documentdb_core.bson)) NULLS FIRST", + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX14000000105f69642e6576656e74000100000000'::documentdb_core.bson)) NULLS FIRST" + ] + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': { + '$in': [ + 'app.bsky.feed.post', + 'app.bsky.feed.repost', + 'app.bsky.feed.like' + ] + } + } + }, + { + '$project': { + _id: 0, + event: '$commit.collection', + hour_of_day: { + '$hour': { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + } + }, + { + '$group': { + _id: { + event: '$event', + hour_of_day: '$hour_of_day' + }, + count: { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id.hour_of_day': 1, + '_id.event': 1 + } + } + ], + cursor: {}, + '$db': 'bluesky_1m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q4: + +{ + queryPlanner: { + Plan: { + 'Total Cost': 78222.35, + 'Plan Rows': 3, + 'Plan Width': 64, + Plans: [ + { + 'Async Capable': false, + 'Total Cost': 78534.85, + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Startup Cost': 78222.34, + 'Plan Rows': 125005, + 'Plan Width': 64, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4), 'BSONHEX180000001066697273745f706f73745f7473000100000000'::documentdb_core.bson)) NULLS FIRST" + ], + Plans: [ + { + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Startup Cost': 71918.99, + 'Total Cost': 76606.67, + 'Plan Width': 64, + 'Node Type': 'Subquery Scan', + 'Parent Relationship': 'Outer', + 'Plan Rows': 125005, + Plans: [ + { + 'Plan Width': 128, + 'Parent Relationship': 'Subquery', + 'Async Capable': false, + 'Startup Cost': 71918.99, + 'Plan Rows': 125005, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Total Cost': 72231.5, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Total Cost': 0, + 'Plan Rows': 1, + 'Plan Width': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Node Type': 'Bitmap Index Scan', + 'Parallel Aware': false, + 'Async Capable': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1', + 'Startup Cost': 0 + } + ], + 'Relation Name': 'documents_10', + Alias: 'collection', + 'Startup Cost': 31.25, + 'Plan Rows': 125005, + 'Node Type': 'Bitmap Heap Scan', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 660.3, + 'Plan Width': 543, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))" + } + ], + 'Node Type': 'Sort', + 'Parallel Aware': false, + 'Plan Rows': 125005, + 'Plan Width': 543, + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 71918.99 + } + ], + 'Node Type': 'Aggregate', + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parallel Aware': false, + 'Total Cost': 75669.14 + } + ], + 'Parallel Aware': false + } + ] + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 78222.34 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + first_post_ts: { + '$min': '$timestamp' + } + } + }, + { + '$sort': { + first_post_ts: 1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_1m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} +------------------------------------------------------------------------------------------------------------------------ +Index usage for query Q5: + +{ + queryPlanner: { + Plan: { + Plans: [ + { + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Async Capable': false, + Alias: 'agg_stage_2', + 'Startup Cost': 71918.99, + 'Total Cost': 78481.75, + 'Node Type': 'Subquery Scan', + 'Parallel Aware': false, + 'Plan Rows': 125005, + 'Plan Width': 64, + Plans: [ + { + Strategy: 'Sorted', + 'Partial Mode': 'Simple', + 'Parent Relationship': 'Subquery', + 'Plan Width': 192, + 'Group Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + Plans: [ + { + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Plan Rows': 125005, + 'Sort Key': [ + "(documentdb_api_catalog.bson_expression_get(documentdb_api_catalog.bson_dollar_project(collection.document, 'BSONHEX63000000105f6964000000000002757365725f6964000500000024646964000374696d657374616d7000380000000324746f44617465002a0000000424646976696465001c000000023000090000002474696d655f757300103100e803000000000000'::documentdb_core.bson), 'BSONHEX1400000002000900000024757365725f69640000'::documentdb_core.bson, true))" + ], + 'Async Capable': false, + 'Startup Cost': 71918.99, + 'Total Cost': 72231.5, + 'Plan Width': 543, + Plans: [ + { + 'Total Cost': 660.3, + 'Node Type': 'Bitmap Heap Scan', + 'Async Capable': false, + Alias: 'collection', + 'Plan Rows': 125005, + 'Plan Width': 543, + 'Recheck Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + Plans: [ + { + 'Total Cost': 0, + 'Plan Rows': 1, + 'Plan Width': 0, + 'Node Type': 'Bitmap Index Scan', + 'Parent Relationship': 'Outer', + 'Async Capable': false, + 'Startup Cost': 0, + 'Index Cond': "((document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX16000000026b696e640007000000636f6d6d69740000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2200000002636f6d6d69742e6f7065726174696f6e00070000006372656174650000'::documentdb_core.bson) AND (document OPERATOR(documentdb_api_catalog.@=) 'BSONHEX2f00000002636f6d6d69742e636f6c6c656374696f6e00130000006170702e62736b792e666565642e706f73740000'::documentdb_core.bson))", + 'Parallel Aware': false, + 'Index Name': 'kind_1_commit.operation_1_commit.collection_1_did_1_time_us_1' + } + ], + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Relation Name': 'documents_10', + 'Startup Cost': 31.25 + } + ], + 'Node Type': 'Sort' + } + ], + 'Node Type': 'Aggregate', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 71918.99, + 'Total Cost': 76919.19, + 'Plan Rows': 125005 + } + ] + } + ], + 'Startup Cost': 80097.41, + 'Plan Rows': 125005, + 'Plan Width': 64, + 'Node Type': 'Sort', + 'Parent Relationship': 'Outer', + 'Parallel Aware': false, + 'Async Capable': false, + 'Total Cost': 80409.93, + 'Sort Key': [ + "(documentdb_api_catalog.bson_orderby(documentdb_api_catalog.bson_dollar_project(documentdb_core.bson_repath_and_build(agg_stage_2.c1, agg_stage_2.c2, agg_stage_2.c3, agg_stage_2.c4, agg_stage_2.c5, agg_stage_2.c6), 'BSONHEX790000000361637469766974795f7370616e006500000003246461746544696666005500000002737461727444617465000f000000246d696e5f74696d657374616d700002656e6444617465000f000000246d61785f74696d657374616d700002756e6974000c0000006d696c6c697365636f6e6400000000'::documentdb_core.bson), 'BSONHEX180000001061637469766974795f7370616e00ffffffff00'::documentdb_core.bson)) DESC NULLS LAST" + ] + } + ], + 'Node Type': 'Limit', + 'Parallel Aware': false, + 'Async Capable': false, + 'Startup Cost': 80097.41, + 'Total Cost': 80097.42, + 'Plan Rows': 3, + 'Plan Width': 64 + } + }, + explainVersion: '1', + command: { + aggregate: 'bluesky', + pipeline: [ + { + '$match': { + kind: 'commit', + 'commit.operation': 'create', + 'commit.collection': 'app.bsky.feed.post' + } + }, + { + '$project': { + _id: 0, + user_id: '$did', + timestamp: { + '$toDate': { + '$divide': [ + '$time_us', + 1000 + ] + } + } + } + }, + { + '$group': { + _id: '$user_id', + min_timestamp: { + '$min': '$timestamp' + }, + max_timestamp: { + '$max': '$timestamp' + } + } + }, + { + '$project': { + activity_span: { + '$dateDiff': { + startDate: '$min_timestamp', + endDate: '$max_timestamp', + unit: 'millisecond' + } + } + } + }, + { + '$sort': { + activity_span: -1 + } + }, + { + '$limit': 3 + } + ], + cursor: {}, + '$db': 'bluesky_1m_zstd' + }, + serverInfo: { + host: 'ad6436bb4741', + port: 27017, + version: '7.0.77', + gitVersion: '2214721e51d64be04ad016f401d0abf8a335993e', + ferretdb: { + version: 'v2.0.0' + } + }, + ok: 1 +} diff --git a/ferretdb/results/m6i.8xlarge_bluesky_100m_snappy.json b/ferretdb/results/m6i.8xlarge_bluesky_100m_snappy.json new file mode 100644 index 0000000..4d4fd95 --- /dev/null +++ b/ferretdb/results/m6i.8xlarge_bluesky_100m_snappy.json @@ -0,0 +1,24 @@ +{ + "system": "FerretDB (snappy)", + "version": "2.0.0", + "os": "Ubuntu 24.04", + "date": "2025-03-26", + "machine": "m6i.8xlarge, 10000gib gp3", + "cluster_size": 1, + "comment": "", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 100000000, + "num_loaded_documents": 92964224, + "total_size": 73347252224, + "data_size": 56168030208, + "index_size": 17179222016, + "result": [ + [500.709,75.935,75.329], + [4142.17,237.019,237.258], + [2998.32,122.33,123.575], + [3002.11,136.762,135.919], + [null, null, null] + ] +} diff --git a/ferretdb/results/m6i.8xlarge_bluesky_100m_zstd.json b/ferretdb/results/m6i.8xlarge_bluesky_100m_zstd.json new file mode 100644 index 0000000..19033f8 --- /dev/null +++ b/ferretdb/results/m6i.8xlarge_bluesky_100m_zstd.json @@ -0,0 +1,24 @@ +{ + "system": "FerretDB (zstd)", + "version": "2.0.0", + "os": "Ubuntu 24.04", + "date": "2025-03-26", + "machine": "m6i.8xlarge, 10000gib gp3", + "cluster_size": 1, + "comment": "", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 100000000, + "num_loaded_documents": 91913464, + "total_size": 73348431872, + "data_size": 56168398848, + "index_size": 17180033024, + "result": [ + [627.897,75.598,75.506], + [4243.12,239.211,240.409], + [3068.22,125.121,124.925], + [3087.61,136.702,136.078], + [null, null, null] + ] +} diff --git a/ferretdb/results/m6i.8xlarge_bluesky_10m_snappy.json b/ferretdb/results/m6i.8xlarge_bluesky_10m_snappy.json new file mode 100644 index 0000000..7edd447 --- /dev/null +++ b/ferretdb/results/m6i.8xlarge_bluesky_10m_snappy.json @@ -0,0 +1,24 @@ +{ + "system": "FerretDB (snappy)", + "version": "2.0.0", + "os": "Ubuntu 24.04", + "date": "2025-03-26", + "machine": "m6i.8xlarge, 10000gib gp3", + "cluster_size": 1, + "comment": "", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 10000000, + "num_loaded_documents": 6865983, + "total_size": 6229532672, + "data_size": 4739489792, + "index_size": 1490042880, + "result": [ + [37.516,6.396,6.348], + [283.463,18.361,18.279], + [350.791,20.119,20.179], + [251.929,9.968,10.006], + [253.831,11.495,11.524] + ] +} diff --git a/ferretdb/results/m6i.8xlarge_bluesky_10m_zstd.json b/ferretdb/results/m6i.8xlarge_bluesky_10m_zstd.json new file mode 100644 index 0000000..62dcb21 --- /dev/null +++ b/ferretdb/results/m6i.8xlarge_bluesky_10m_zstd.json @@ -0,0 +1,24 @@ +{ + "system": "FerretDB (zstd)", + "version": "2.0.0", + "os": "Ubuntu 24.04", + "date": "2025-03-26", + "machine": "m6i.8xlarge, 10000gib gp3", + "cluster_size": 1, + "comment": "", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 10000000, + "num_loaded_documents": 6943443, + "total_size": 6229417984, + "data_size": 4739555328, + "index_size": 1489862656, + "result": [ + [47.254,6.443,6.393], + [279.628,18.415,18.402], + [351.67,20.299,20.212], + [252.707,10.042,10], + [249.176,11.413,11.399] + ] +} diff --git a/ferretdb/results/m6i.8xlarge_bluesky_1m_snappy.json b/ferretdb/results/m6i.8xlarge_bluesky_1m_snappy.json new file mode 100644 index 0000000..c2ece37 --- /dev/null +++ b/ferretdb/results/m6i.8xlarge_bluesky_1m_snappy.json @@ -0,0 +1,24 @@ +{ + "system": "FerretDB (snappy)", + "version": "2.0.0", + "os": "Ubuntu 24.04", + "date": "2025-03-26", + "machine": "m6i.8xlarge, 10000gib gp3", + "cluster_size": 1, + "comment": "", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000, + "num_loaded_documents": 953126, + "total_size": 794648576, + "data_size": 598032384, + "index_size": 196616192, + "result": [ + [3.655,0.868,0.871], + [6.475,3.028,2.937], + [33.898,2.34,2.304], + [32.485,0.946,0.942], + [32.785,1.149,1.16] + ] +} diff --git a/ferretdb/results/m6i.8xlarge_bluesky_1m_zstd.json b/ferretdb/results/m6i.8xlarge_bluesky_1m_zstd.json new file mode 100644 index 0000000..fbe655c --- /dev/null +++ b/ferretdb/results/m6i.8xlarge_bluesky_1m_zstd.json @@ -0,0 +1,24 @@ +{ + "system": "FerretDB (zstd)", + "version": "2.0.0", + "os": "Ubuntu 24.04", + "date": "2025-01-13", + "machine": "m6i.8xlarge, 10000gib gp3", + "cluster_size": 1, + "comment": "", + "retains_structure": "yes", + "tags": [ + ], + "dataset_size": 1000000, + "num_loaded_documents": 953126, + "total_size": 794689536, + "data_size": 598056960, + "index_size": 196632576, + "result": [ + [4.468,0.95,0.879], + [10.19,3.031,2.961], + [44.472,2.319,2.335], + [31.885,0.958,0.951], + [32.386,1.167,1.167] + ] +} From ce36cd25947d2925f7cfb4fd349bedbc33e402d4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Mar 2025 08:03:32 +0000 Subject: [PATCH 14/14] Update comments --- ferretdb/index_usage.sh | 4 ++-- ferretdb/run_queries.sh | 5 ++--- mongodb/run_queries.sh | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ferretdb/index_usage.sh b/ferretdb/index_usage.sh index 6ada891..dabe84a 100755 --- a/ferretdb/index_usage.sh +++ b/ferretdb/index_usage.sh @@ -1,6 +1,6 @@ #!/bin/bash -# If you change something in this file other than that, please also change it in mongodb/index_usage.sh as well +# If you change something in this file, please also change mongodb/index_usage.sh # Check if the required arguments are provided if [[ $# -lt 1 ]]; then @@ -44,4 +44,4 @@ cat "$QUERY_FILE" | while read -r query; do # Increment the query number QUERY_NUM=$((QUERY_NUM + 1)) -done; \ No newline at end of file +done; diff --git a/ferretdb/run_queries.sh b/ferretdb/run_queries.sh index 678c99d..e92a5f1 100755 --- a/ferretdb/run_queries.sh +++ b/ferretdb/run_queries.sh @@ -1,7 +1,6 @@ #!/bin/bash -# If you change something in this file other than that, please also change it in mongodb/run_queries.sh as well - +# If you change something in this file, please also change mongodb/run_queries.sh # Check if the required arguments are provided if [[ $# -lt 1 ]]; then @@ -57,4 +56,4 @@ cat "$QUERY_FILE" | while read -r query; do print('Execution time: ' + (end.getTime() - start.getTime()) + 'ms'); " done -done \ No newline at end of file +done diff --git a/mongodb/run_queries.sh b/mongodb/run_queries.sh index e2ac655..0f75aae 100755 --- a/mongodb/run_queries.sh +++ b/mongodb/run_queries.sh @@ -1,6 +1,6 @@ #!/bin/bash -# If you change something in this file, please also change it in ferretdb/run_queries.sh as well +# If you change something in this file, please also change ferretdb/run_queries.sh # Check if the required arguments are provided if [[ $# -lt 1 ]]; then @@ -79,4 +79,4 @@ cat "$QUERY_FILE" | while read -r query; do print('Execution time: ' + (end.getTime() - start.getTime()) + 'ms'); " done -done \ No newline at end of file +done