Skip to content

Commit

Permalink
Better query plan viewing experience (#4285)
Browse files Browse the repository at this point in the history
* stuff

* stuff including batch_upsert function

* stuff

* do things

* stuff

* different timestamps

* stuff

* Revert changes to comment.rs

* Update comment.rs

* Update comment.rs

* Update post_view.rs

* Update utils.rs

* Update up.sql

* Update up.sql

* Update down.sql

* Update up.sql

* Update main.rs

* use anyhow macro

* replace get(0) with first()

* as_slice

* Update series.rs

* Update db_perf.sh

* Update and rename crates/db_schema/src/utils/series.rs to crates/db_perf/src/series.rs

* Update utils.rs

* Update main.rs

* Update main.rs

* Update .woodpecker.yml

* fmt main.rs

* Update .woodpecker.yml

* Instance::delete at end

* Update main.rs

* Update Cargo.toml

---------

Co-authored-by: Nutomic <me@nutomic.com>
  • Loading branch information
dullbananas and Nutomic committed Jan 24, 2024
1 parent 8670403 commit 759f6d8
Show file tree
Hide file tree
Showing 15 changed files with 744 additions and 35 deletions.
12 changes: 12 additions & 0 deletions .woodpecker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ steps:
- diesel migration redo
when: *slow_check_paths

check_db_perf_tool:
image: *rust_image
environment:
LEMMY_DATABASE_URL: postgres://lemmy:password@database:5432/lemmy
RUST_BACKTRACE: "1"
CARGO_HOME: .cargo_home
commands:
# same as scripts/db_perf.sh but without creating a new database server
- export LEMMY_CONFIG_LOCATION=config/config.hjson
- cargo run --package lemmy_db_perf -- --posts 10 --read-post-pages 1
when: *slow_check_paths

cargo_clippy:
image: *rust_image
environment:
Expand Down
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ members = [
"crates/api_common",
"crates/apub",
"crates/utils",
"crates/db_perf",
"crates/db_schema",
"crates/db_views",
"crates/db_views_actor",
Expand Down Expand Up @@ -156,6 +157,7 @@ tokio-postgres = "0.7.10"
tokio-postgres-rustls = "0.10.0"
enum-map = "2.7"
moka = { version = "0.12.1", features = ["future"] }
clap = { version = "4.4.11", features = ["derive"] }
pretty_assertions = "1.4.0"

[dependencies]
Expand Down Expand Up @@ -193,7 +195,7 @@ futures-util = { workspace = true }
chrono = { workspace = true }
prometheus = { version = "0.13.3", features = ["process"] }
serial_test = { workspace = true }
clap = { version = "4.4.11", features = ["derive"] }
clap = { workspace = true }
actix-web-prom = "0.7.0"

[dev-dependencies]
Expand Down
23 changes: 23 additions & 0 deletions crates/db_perf/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "lemmy_db_perf"
version.workspace = true
edition.workspace = true
description.workspace = true
license.workspace = true
homepage.workspace = true
documentation.workspace = true
repository.workspace = true


[lints]
workspace = true

[dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
diesel = { workspace = true }
diesel-async = { workspace = true }
lemmy_db_schema = { workspace = true }
lemmy_db_views = { workspace = true, features = ["full"] }
lemmy_utils = { workspace = true }
tokio = { workspace = true }
179 changes: 179 additions & 0 deletions crates/db_perf/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
mod series;

use crate::series::ValuesFromSeries;
use anyhow::Context;
use clap::Parser;
use diesel::{
dsl::{self, sql},
sql_types,
ExpressionMethods,
IntoSql,
};
use diesel_async::{RunQueryDsl, SimpleAsyncConnection};
use lemmy_db_schema::{
schema::post,
source::{
community::{Community, CommunityInsertForm},
instance::Instance,
person::{Person, PersonInsertForm},
},
traits::Crud,
utils::{build_db_pool, get_conn, now},
SortType,
};
use lemmy_db_views::{post_view::PostQuery, structs::PaginationCursor};
use lemmy_utils::error::{LemmyErrorExt2, LemmyResult};
use std::num::NonZeroU32;

#[derive(Parser, Debug)]
struct CmdArgs {
#[arg(long, default_value_t = 3.try_into().unwrap())]
communities: NonZeroU32,
#[arg(long, default_value_t = 3.try_into().unwrap())]
people: NonZeroU32,
#[arg(long, default_value_t = 100000.try_into().unwrap())]
posts: NonZeroU32,
#[arg(long, default_value_t = 0)]
read_post_pages: u32,
#[arg(long)]
explain_insertions: bool,
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
let mut result = try_main().await.into_anyhow();
if let Ok(path) = std::env::var("PGDATA") {
result = result.with_context(|| {
format!("Failed to run lemmy_db_perf (more details might be available in {path}/log)")
});
}
result
}

async fn try_main() -> LemmyResult<()> {
let args = CmdArgs::parse();
let pool = &build_db_pool().await?;
let pool = &mut pool.into();
let conn = &mut get_conn(pool).await?;

if args.explain_insertions {
// log_nested_statements is enabled to log trigger execution
conn
.batch_execute(
"SET auto_explain.log_min_duration = 0; SET auto_explain.log_nested_statements = on;",
)
.await?;
}

let instance = Instance::read_or_create(&mut conn.into(), "reddit.com".to_owned()).await?;

println!("🫃 creating {} people", args.people);
let mut person_ids = vec![];
for i in 0..args.people.get() {
let form = PersonInsertForm::builder()
.name(format!("p{i}"))
.public_key("pubkey".to_owned())
.instance_id(instance.id)
.build();
person_ids.push(Person::create(&mut conn.into(), &form).await?.id);
}

println!("🌍 creating {} communities", args.communities);
let mut community_ids = vec![];
for i in 0..args.communities.get() {
let form = CommunityInsertForm::builder()
.name(format!("c{i}"))
.title(i.to_string())
.instance_id(instance.id)
.build();
community_ids.push(Community::create(&mut conn.into(), &form).await?.id);
}

let post_batches = args.people.get() * args.communities.get();
let posts_per_batch = args.posts.get() / post_batches;
let num_posts = post_batches * posts_per_batch;
println!(
"📜 creating {} posts ({} featured in community)",
num_posts, post_batches
);
let mut num_inserted_posts = 0;
// TODO: progress bar
for person_id in &person_ids {
for community_id in &community_ids {
let n = dsl::insert_into(post::table)
.values(ValuesFromSeries {
start: 1,
stop: posts_per_batch.into(),
selection: (
"AAAAAAAAAAA".into_sql::<sql_types::Text>(),
person_id.into_sql::<sql_types::Integer>(),
community_id.into_sql::<sql_types::Integer>(),
series::current_value.eq(1),
now()
- sql::<sql_types::Interval>("make_interval(secs => ")
.bind::<sql_types::BigInt, _>(series::current_value)
.sql(")"),
),
})
.into_columns((
post::name,
post::creator_id,
post::community_id,
post::featured_community,
post::published,
))
.execute(conn)
.await?;
num_inserted_posts += n;
}
}
// Make sure the println above shows the correct amount
assert_eq!(num_inserted_posts, num_posts as usize);

// Enable auto_explain
conn
.batch_execute(
"SET auto_explain.log_min_duration = 0; SET auto_explain.log_nested_statements = off;",
)
.await?;

// TODO: show execution duration stats
let mut page_after = None;
for page_num in 1..=args.read_post_pages {
println!(
"👀 getting page {page_num} of posts (pagination cursor used: {})",
page_after.is_some()
);

// TODO: include local_user
let post_views = PostQuery {
community_id: community_ids.as_slice().first().cloned(),
sort: Some(SortType::New),
limit: Some(20),
page_after,
..Default::default()
}
.list(&mut conn.into())
.await?;

if let Some(post_view) = post_views.into_iter().last() {
println!("👀 getting pagination cursor data for next page");
let cursor_data = PaginationCursor::after_post(&post_view)
.read(&mut conn.into())
.await?;
page_after = Some(cursor_data);
} else {
println!("👀 reached empty page");
break;
}
}

// Delete everything, which might prevent problems if this is not run using scripts/db_perf.sh
Instance::delete(&mut conn.into(), instance.id).await?;

if let Ok(path) = std::env::var("PGDATA") {
println!("🪵 query plans written in {path}/log");
}

Ok(())
}
98 changes: 98 additions & 0 deletions crates/db_perf/src/series.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use diesel::{
dsl,
expression::{is_aggregate, ValidGrouping},
pg::Pg,
query_builder::{AsQuery, AstPass, QueryFragment},
result::Error,
sql_types,
AppearsOnTable,
Expression,
Insertable,
QueryId,
SelectableExpression,
};

/// Gererates a series of rows for insertion.
///
/// An inclusive range is created from `start` and `stop`. A row for each number is generated using `selection`, which can be a tuple.
/// [`current_value`] is an expression that gets the current value.
///
/// For example, if there's a `numbers` table with a `number` column, this inserts all numbers from 1 to 10 in a single statement:
///
/// ```
/// dsl::insert_into(numbers::table)
/// .values(ValuesFromSeries {
/// start: 1,
/// stop: 10,
/// selection: series::current_value,
/// })
/// .into_columns(numbers::number)
/// ```
#[derive(QueryId)]
pub struct ValuesFromSeries<S> {
pub start: i64,
pub stop: i64,
pub selection: S,
}

impl<S: QueryFragment<Pg>> QueryFragment<Pg> for ValuesFromSeries<S> {
fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> Result<(), Error> {
self.selection.walk_ast(out.reborrow())?;
out.push_sql(" FROM generate_series(");
out.push_bind_param::<sql_types::BigInt, _>(&self.start)?;
out.push_sql(", ");
out.push_bind_param::<sql_types::BigInt, _>(&self.stop)?;
out.push_sql(")");

Ok(())
}
}

impl<S: Expression> Expression for ValuesFromSeries<S> {
type SqlType = S::SqlType;
}

impl<T, S: AppearsOnTable<current_value>> AppearsOnTable<T> for ValuesFromSeries<S> {}

impl<T, S: SelectableExpression<current_value>> SelectableExpression<T> for ValuesFromSeries<S> {}

impl<T, S: SelectableExpression<current_value>> Insertable<T> for ValuesFromSeries<S>
where
dsl::BareSelect<Self>: AsQuery + Insertable<T>,
{
type Values = <dsl::BareSelect<Self> as Insertable<T>>::Values;

fn values(self) -> Self::Values {
dsl::select(self).values()
}
}

impl<S: ValidGrouping<(), IsAggregate = is_aggregate::No>> ValidGrouping<()>
for ValuesFromSeries<S>
{
type IsAggregate = is_aggregate::No;
}

#[allow(non_camel_case_types)]
#[derive(QueryId, Clone, Copy, Debug)]
pub struct current_value;

impl QueryFragment<Pg> for current_value {
fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> Result<(), Error> {
out.push_identifier("generate_series")?;

Ok(())
}
}

impl Expression for current_value {
type SqlType = sql_types::BigInt;
}

impl AppearsOnTable<current_value> for current_value {}

impl SelectableExpression<current_value> for current_value {}

impl ValidGrouping<()> for current_value {
type IsAggregate = is_aggregate::No;
}
1 change: 1 addition & 0 deletions crates/db_schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ tokio-postgres = { workspace = true, optional = true }
tokio-postgres-rustls = { workspace = true, optional = true }
rustls = { workspace = true, optional = true }
uuid = { workspace = true, features = ["v4"] }
anyhow = { workspace = true }

[dev-dependencies]
serial_test = { workspace = true }
Expand Down
Loading

0 comments on commit 759f6d8

Please sign in to comment.