diff --git a/Cargo.lock b/Cargo.lock index 7009dd4..e887f7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,6 +39,18 @@ dependencies = [ "yansi", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "autocfg" version = "1.5.0" @@ -57,6 +69,20 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + [[package]] name = "bluejay-core" version = "0.3.1" @@ -67,13 +93,24 @@ dependencies = [ "strum", ] +[[package]] +name = "bluejay-operation-normalizer" +version = "0.3.1" +dependencies = [ + "blake3", + "bluejay-core", + "bluejay-parser", + "bumpalo", + "criterion 0.5.1", +] + [[package]] name = "bluejay-parser" version = "0.3.1" dependencies = [ "ariadne", "bluejay-core", - "criterion", + "criterion 0.7.0", "enum-as-inner", "insta", "itertools 0.14.0", @@ -100,7 +137,7 @@ dependencies = [ "bluejay-core", "bluejay-parser", "bluejay-printer", - "criterion", + "criterion 0.7.0", "strum", ] @@ -146,7 +183,7 @@ version = "0.3.1" dependencies = [ "bluejay-core", "bluejay-parser", - "criterion", + "criterion 0.7.0", "insta", "itertools 0.14.0", "once_cell", @@ -191,6 +228,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cc" +version = "1.2.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -261,6 +308,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "convert_case" version = "0.10.0" @@ -270,6 +323,41 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot 0.5.0", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + [[package]] name = "criterion" version = "0.7.0" @@ -280,7 +368,7 @@ dependencies = [ "cast", "ciborium", "clap", - "criterion-plot", + "criterion-plot 0.6.0", "itertools 0.13.0", "num-traits", "oorandom", @@ -293,6 +381,16 @@ dependencies = [ "walkdir", ] +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "criterion-plot" version = "0.6.0" @@ -389,6 +487,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "fnv" version = "1.0.7" @@ -465,6 +569,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "id-arena" version = "2.3.0" @@ -497,6 +607,26 @@ dependencies = [ "walkdir", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -846,6 +976,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "similar" version = "2.7.0" diff --git a/Cargo.toml b/Cargo.toml index 2c9752b..76b5ec1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "bluejay-parser", "bluejay-printer", "bluejay-schema-comparator", + "bluejay-operation-normalizer", "bluejay-typegen", "bluejay-typegen-codegen", "bluejay-typegen-macro", @@ -27,6 +28,7 @@ bluejay-typegen = { path = "./bluejay-typegen", version = "=0.3.1" } bluejay-typegen-codegen = { path = "./bluejay-typegen-codegen", version = "=0.3.1" } bluejay-typegen-macro = { path = "./bluejay-typegen-macro", version = "=0.3.1" } bluejay-validator = { path = "./bluejay-validator", version = "=0.3.1" } +bluejay-operation-normalizer = { path = "./bluejay-operation-normalizer", version = "=0.3.1" } bluejay-visibility = { path = "./bluejay-visibility", version = "=0.3.1" } [profile.shopify-function] diff --git a/README.md b/README.md index c659a04..6b94e03 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ ## Crates - [`bluejay-core`](/bluejay-core/README.md) +- [`bluejay-operation-normalize`](/bluejay-operation-normalizer/README.md) - [`bluejay-parser`](/bluejay-parser/README.md) - [`bluejay-printer`](/bluejay-printer/README.md) - [`bluejay-schema-comparator`](/bluejay-schema-comparator/README.md) diff --git a/bluejay-core/src/operation_type.rs b/bluejay-core/src/operation_type.rs index af3a3a8..d8ac5bc 100644 --- a/bluejay-core/src/operation_type.rs +++ b/bluejay-core/src/operation_type.rs @@ -1,7 +1,7 @@ use crate::definition::DirectiveLocation; -use strum::{Display, EnumString, VariantNames}; +use strum::{AsRefStr, Display, EnumString, VariantNames}; -#[derive(Debug, Clone, Copy, PartialEq, EnumString, Display, VariantNames)] +#[derive(Debug, Clone, Copy, PartialEq, EnumString, AsRefStr, Display, VariantNames)] #[strum(serialize_all = "camelCase")] pub enum OperationType { Query, diff --git a/bluejay-operation-normalizer/Cargo.toml b/bluejay-operation-normalizer/Cargo.toml new file mode 100644 index 0000000..7269e01 --- /dev/null +++ b/bluejay-operation-normalizer/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "bluejay-operation-normalizer" +version.workspace = true +edition = "2021" +license = "MIT" +repository = "https://github.com/Shopify/bluejay" +homepage = "https://github.com/Shopify/bluejay" +keywords = ["graphql"] +description = "GraphQL operation normalization and signature hashing" + +[dependencies] +bluejay-core = { workspace = true } +blake3 = "1" +bumpalo = { version = "3", features = ["collections"] } + +[dev-dependencies] +bluejay-parser = { workspace = true } +criterion = "0.5" + +[[bench]] +name = "normalize" +harness = false + +[lints] +workspace = true diff --git a/bluejay-operation-normalizer/README.md b/bluejay-operation-normalizer/README.md new file mode 100644 index 0000000..5c3204b --- /dev/null +++ b/bluejay-operation-normalizer/README.md @@ -0,0 +1,3 @@ +# `bluejay-operation-normalizer` + +`bluejay-operation-normalizer` provides normalization and BLAKE3-based signature generation for GraphQL operations, producing canonical representations that group operations by structure rather than syntax. diff --git a/bluejay-operation-normalizer/benches/normalize.rs b/bluejay-operation-normalizer/benches/normalize.rs new file mode 100644 index 0000000..5a8a2aa --- /dev/null +++ b/bluejay-operation-normalizer/benches/normalize.rs @@ -0,0 +1,326 @@ +use bluejay_parser::ast::{executable::ExecutableDocument, Parse}; +use criterion::{criterion_group, criterion_main, Criterion}; + +fn parse(input: &str) -> ExecutableDocument { + ExecutableDocument::parse(input) + .result + .expect("parse error") +} + +fn bench_small(c: &mut Criterion) { + let doc = parse("query { user { name email } }"); + c.bench_function("normalize_small", |b| { + b.iter(|| bluejay_operation_normalizer::normalize(&doc, None).unwrap()) + }); + c.bench_function("signature_small", |b| { + b.iter(|| bluejay_operation_normalizer::signature(&doc, None).unwrap()) + }); +} + +fn bench_medium(c: &mut Criterion) { + let doc = parse( + r#" + query GetUser($id: ID!, $first: Int = 10, $after: String) { + user(id: $id) { + name + email + avatar + role + posts(first: $first, after: $after, orderBy: "created_at") { + edges { + cursor + node { + title + body + createdAt + tags + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + } + "#, + ); + c.bench_function("normalize_medium", |b| { + b.iter(|| bluejay_operation_normalizer::normalize(&doc, Some("GetUser")).unwrap()) + }); + c.bench_function("signature_medium", |b| { + b.iter(|| bluejay_operation_normalizer::signature(&doc, Some("GetUser")).unwrap()) + }); +} + +fn bench_complex(c: &mut Criterion) { + let doc = parse( + r#" + query ComplexQuery($userId: ID!, $includeEmail: Boolean = true, $limit: Int = 20, $offset: Int = 0) @cacheControl(maxAge: 300) { + user(id: $userId) { + id + ...UserBasic + ...UserPosts + followers(limit: $limit, offset: $offset) { + ...UserBasic + mutualFriends { + ...UserBasic + } + } + } + systemStatus { + healthy + version + uptime + } + } + + fragment UserBasic on User { + id + name + email @include(if: $includeEmail) + avatar + role + createdAt + } + + fragment UserPosts on User { + posts(first: 10) { + edges { + cursor + node { + ...PostDetails + } + } + pageInfo { + hasNextPage + hasPreviousPage + startCursor + endCursor + } + totalCount + } + } + + fragment PostDetails on Post { + id + title + body + createdAt + updatedAt + author { + ...UserBasic + } + comments(first: 5) { + edges { + node { + id + body + author { + name + } + } + } + } + tags + likes + } + "#, + ); + c.bench_function("normalize_complex", |b| { + b.iter(|| bluejay_operation_normalizer::normalize(&doc, Some("ComplexQuery")).unwrap()) + }); + c.bench_function("signature_complex", |b| { + b.iter(|| bluejay_operation_normalizer::signature(&doc, Some("ComplexQuery")).unwrap()) + }); +} + +/// Simulates a Relay/Apollo Client app where each component defines a small +/// fragment and the page query composes them. 10 fragments, transitive deps, +/// plus an unused fragment that should be stripped. +fn bench_fragment_colocation(c: &mut Criterion) { + let doc = parse( + r#" + query ProductPage($handle: String!, $first: Int = 10, $after: String) { + product(handle: $handle) { + ...ProductHeader + ...ProductPricing + ...ProductMedia + ...ProductVariants + ...ProductMetafields + ...ProductSeo + } + shop { + ...ShopInfo + } + cart { + ...CartSummary + } + } + + fragment ProductHeader on Product { + id + title + handle + description + vendor + productType + tags + createdAt + updatedAt + } + + fragment ProductPricing on Product { + priceRange { + ...MoneyRange + } + compareAtPriceRange { + ...MoneyRange + } + } + + fragment MoneyRange on PriceRange { + minVariantPrice { ...MoneyFields } + maxVariantPrice { ...MoneyFields } + } + + fragment MoneyFields on Money { + amount + currencyCode + } + + fragment ProductMedia on Product { + images(first: 10) { + edges { + node { + id + url + altText + width + height + } + } + } + } + + fragment ProductVariants on Product { + variants(first: $first, after: $after) { + edges { + cursor + node { + id + title + sku + availableForSale + price { ...MoneyFields } + compareAtPrice { ...MoneyFields } + selectedOptions { + name + value + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + + fragment ProductMetafields on Product { + metafield1: metafield(namespace: "custom", key: "care_instructions") { value type } + metafield2: metafield(namespace: "custom", key: "material") { value type } + metafield3: metafield(namespace: "custom", key: "sizing_guide") { value type } + } + + fragment ProductSeo on Product { + seo { + title + description + } + } + + fragment ShopInfo on Shop { + name + primaryDomain { url } + shipsToCountries + } + + fragment CartSummary on Cart { + id + totalQuantity + estimatedCost { + totalAmount { ...MoneyFields } + subtotalAmount { ...MoneyFields } + totalTaxAmount { ...MoneyFields } + } + } + + fragment UnusedAnalytics on Product { + id + title + vendor + } + "#, + ); + c.bench_function("normalize_fragment_colocation", |b| { + b.iter(|| bluejay_operation_normalizer::normalize(&doc, Some("ProductPage")).unwrap()) + }); + c.bench_function("signature_fragment_colocation", |b| { + b.iter(|| bluejay_operation_normalizer::signature(&doc, Some("ProductPage")).unwrap()) + }); +} + +/// 30 fields in reverse alphabetical order at root level. Worst case for sort. +fn bench_wide_reverse_sorted(c: &mut Criterion) { + let doc = parse( + r#" + query DashboardQuery { + zones { id } + yields { id } + xrefs { id } + webhooks(topic: "orders/create", format: "json") { id } + variants(first: 50, after: "cursor123", sortKey: TITLE, reverse: true) { id } + users { id } + transactions(first: 10, after: "txn_cursor", query: "status:success AND amount:>100") { id } + subscriptions { id } + returns { id } + quotas { id } + products(first: 25, after: "prod_cursor", query: "status:active", sortKey: BEST_SELLING, reverse: false) { id } + payments { id } + orders(first: 50, after: "order_cursor", query: "financial_status:paid", sortKey: CREATED_AT, reverse: true) { id } + notifications { id } + metafields(namespace: "custom", first: 10) { id } + locations { id } + inventoryLevels { id } + images(first: 20, sortKey: POSITION, reverse: false) { id } + hooks { id } + giftCards { id } + fulfillments(first: 10, after: "ful_cursor") { id } + events(first: 50, after: "evt_cursor", query: "verb:sale", sortKey: CREATED_AT, reverse: true) { id } + discounts(first: 25, query: "status:active", sortKey: CREATED_AT) { id } + customers(first: 100, after: "cust_cursor", query: "country:CA AND orders_count:>5", sortKey: LAST_ORDER_DATE, reverse: true) { id } + collections { id } + blogs { id } + articles { id } + analytics(shopId: "1", startDate: "2024-01-01", endDate: "2024-12-31", granularity: "daily", metrics: ["views", "clicks"], dimensions: ["source", "medium"], filters: {status: "active"}, sortBy: "views", sortOrder: "desc", limit: 100, offset: 0, timezone: "UTC") { id } + } + "#, + ); + c.bench_function("normalize_wide_reverse", |b| { + b.iter(|| bluejay_operation_normalizer::normalize(&doc, Some("DashboardQuery")).unwrap()) + }); + c.bench_function("signature_wide_reverse", |b| { + b.iter(|| bluejay_operation_normalizer::signature(&doc, Some("DashboardQuery")).unwrap()) + }); +} + +criterion_group!( + benches, + bench_small, + bench_medium, + bench_complex, + bench_fragment_colocation, + bench_wide_reverse_sorted, +); +criterion_main!(benches); diff --git a/bluejay-operation-normalizer/src/build.rs b/bluejay-operation-normalizer/src/build.rs new file mode 100644 index 0000000..b1d9c61 --- /dev/null +++ b/bluejay-operation-normalizer/src/build.rs @@ -0,0 +1,253 @@ +//! Builds the normalized IR from a parsed AST in a single recursive pass. +//! +//! Implements algorithm steps 2a–2e: for each selection set, this module builds +//! normalized fields and inline fragments, expands fragment spreads, flattens +//! bare inline fragments, merges matching inline fragments, and sorts — all +//! bottom-up so each level is fully normalized before being returned to the parent. + +use bluejay_core::executable::{ + ExecutableDocument, Field, FragmentDefinition, FragmentSpread, InlineFragment, Selection, + SelectionReference, +}; +use bluejay_core::{Argument, AsIter, Directive}; +use bumpalo::collections::Vec as BVec; +use bumpalo::Bump; +use std::cmp::Ordering; + +use crate::ir::{ + NormalizedDirective, NormalizedField, NormalizedInlineFragment, NormalizedSelection, +}; + +/// Build and normalize a selection set in a single recursive pass (steps 2a–2e). +/// +/// For each selection in the set: +/// - **Fields** (step 2a): collect name (alias dropped), sorted args, sorted directives, +/// and recursively build child selections. +/// - **Fragment spreads** (step 2b): expand to inline fragments with the fragment's type +/// condition, merging directives from both spread and definition. +/// - **Inline fragments** (step 2c): flatten bare ones (no type condition, no directives) +/// into the parent; keep others as-is. +/// +/// After collecting all selections, merge and sort (steps 2d–2e) via [`normalize_in_place`]. +pub(crate) fn build_selections<'a, 'bump, E: ExecutableDocument + 'a>( + selection_set: &'a E::SelectionSet, + fragment_defs: &[(&'a str, &'a E::FragmentDefinition)], + expanding: &mut Vec<&'a str>, + bump: &'bump Bump, +) -> BVec<'bump, NormalizedSelection<'a, 'bump>> { + let mut result = BVec::with_capacity_in(selection_set.len(), bump); + + for selection in selection_set.iter() { + match selection.as_ref() { + // Step 2a: fields + SelectionReference::Field(field) => { + result.push(NormalizedSelection::Field(build_field::( + field, + fragment_defs, + expanding, + bump, + ))); + } + // Step 2b: expand fragment spreads into inline fragments + SelectionReference::FragmentSpread(spread) => { + let name = spread.name(); + // Cycle detection: skip if this fragment is already being expanded + if expanding.contains(&name) { + continue; + } + if let Some((_, frag_def)) = fragment_defs.iter().find(|(n, _)| *n == name) { + expanding.push(name); + + let mut directives = build_directives::(spread.directives(), bump); + directives.extend(build_directives::(frag_def.directives(), bump)); + directives.sort_unstable(); + + let selections = build_selections::( + frag_def.selection_set(), + fragment_defs, + expanding, + bump, + ); + + expanding.pop(); + + result.push(NormalizedSelection::InlineFragment( + NormalizedInlineFragment { + type_condition: Some(frag_def.type_condition()), + directives, + selections, + }, + )); + } + } + // Step 2c: inline fragments — flatten bare ones, keep others + SelectionReference::InlineFragment(inline) => { + let directives = build_directives::(inline.directives(), bump); + let selections = + build_selections::(inline.selection_set(), fragment_defs, expanding, bump); + + // Flatten bare inline fragments (no type condition, no directives) + if inline.type_condition().is_none() && directives.is_empty() { + result.extend(selections); + } else { + result.push(NormalizedSelection::InlineFragment( + NormalizedInlineFragment { + type_condition: inline.type_condition(), + directives, + selections, + }, + )); + } + } + } + } + + // Merge inline fragments with same (type_condition, directives) and sort + normalize_in_place(&mut result); + + result +} + +/// Steps 2d–2e: merge inline fragments with matching `(type_condition, directives)`, +/// then sort all selections (fields first by name, then inline fragments by type +/// condition and directives). +fn normalize_in_place(selections: &mut BVec<'_, NormalizedSelection<'_, '_>>) { + let mut if_count = 0u32; + for s in selections.iter() { + if matches!(s, NormalizedSelection::InlineFragment(_)) { + if_count += 1; + if if_count >= 2 { + break; + } + } + } + + if if_count >= 2 { + let mut i = 0; + while i < selections.len() { + if let NormalizedSelection::InlineFragment(_) = &selections[i] { + let mut j = i + 1; + let mut merged = false; + while j < selections.len() { + let should_merge = match (&selections[i], &selections[j]) { + ( + NormalizedSelection::InlineFragment(a), + NormalizedSelection::InlineFragment(b), + ) => a.type_condition == b.type_condition && a.directives == b.directives, + _ => false, + }; + if should_merge { + let removed = selections.swap_remove(j); + let NormalizedSelection::InlineFragment(inf) = removed else { + unreachable!("should_merge guarantees InlineFragment"); + }; + let NormalizedSelection::InlineFragment(ref mut target) = selections[i] + else { + unreachable!("should_merge guarantees InlineFragment"); + }; + target.selections.extend(inf.selections); + merged = true; + } else { + j += 1; + } + } + // Only re-sort if we actually merged something + if merged { + let NormalizedSelection::InlineFragment(ref mut target) = selections[i] else { + unreachable!("outer if let guarantees InlineFragment"); + }; + target + .selections + .sort_unstable_by(|a, b| cmp_selections(a, b)); + } + } + i += 1; + } + } + + selections.sort_unstable_by(|a, b| cmp_selections(a, b)); +} + +/// Sort order for step 2e: fields first (alphabetically by name), then inline +/// fragments (by type condition, then by directives). +fn cmp_selections(a: &NormalizedSelection<'_, '_>, b: &NormalizedSelection<'_, '_>) -> Ordering { + match (a, b) { + (NormalizedSelection::Field(af), NormalizedSelection::Field(bf)) => af + .name + .cmp(bf.name) + .then_with(|| af.arg_names.as_slice().cmp(bf.arg_names.as_slice())) + .then_with(|| af.directives.cmp(&bf.directives)), + (NormalizedSelection::Field(_), NormalizedSelection::InlineFragment(_)) => Ordering::Less, + (NormalizedSelection::InlineFragment(_), NormalizedSelection::Field(_)) => { + Ordering::Greater + } + (NormalizedSelection::InlineFragment(ai), NormalizedSelection::InlineFragment(bi)) => ai + .type_condition + .cmp(&bi.type_condition) + .then_with(|| ai.directives.cmp(&bi.directives)), + } +} + +/// Step 2a: build a normalized field — alias dropped, args/directives sorted, +/// child selections recursively normalized. +fn build_field<'a, 'bump, E: ExecutableDocument + 'a>( + field: &'a E::Field, + fragment_defs: &[(&'a str, &'a E::FragmentDefinition)], + expanding: &mut Vec<&'a str>, + bump: &'bump Bump, +) -> NormalizedField<'a, 'bump> { + let arg_names = build_arg_names::(field.arguments(), bump); + let directives = build_directives::(field.directives(), bump); + let selections = match field.selection_set() { + Some(ss) => build_selections::(ss, fragment_defs, expanding, bump), + None => BVec::new_in(bump), + }; + + NormalizedField { + name: field.name(), + arg_names, + directives, + selections, + } +} + +/// Collect and sort argument names alphabetically. Values are erased during +/// serialization (step 3) — only names matter for the canonical form. +fn build_arg_names<'a, 'bump, const CONST: bool, E: ExecutableDocument + 'a>( + args: Option<&'a E::Arguments>, + bump: &'bump Bump, +) -> BVec<'bump, &'a str> { + let Some(args) = args else { + return BVec::new_in(bump); + }; + let mut names: BVec<'bump, &'a str> = BVec::from_iter_in(args.iter().map(|a| a.name()), bump); + names.sort_unstable(); + names +} + +/// Collect and sort directives by name (then by argument names). Each directive's +/// argument names are also sorted. Used for fields, inline fragments, and operations. +pub(crate) fn build_directives<'a, 'bump, const CONST: bool, E: ExecutableDocument + 'a>( + directives: Option<&'a E::Directives>, + bump: &'bump Bump, +) -> BVec<'bump, NormalizedDirective<'a, 'bump>> { + let Some(directives) = directives else { + return BVec::new_in(bump); + }; + let mut result: BVec<'bump, _> = BVec::from_iter_in( + directives.iter().map(|d| NormalizedDirective { + name: d.name(), + arg_names: { + let mut names: BVec<'bump, &str> = match d.arguments() { + Some(args) => BVec::from_iter_in(args.iter().map(|a| a.name()), bump), + None => BVec::new_in(bump), + }; + names.sort_unstable(); + names + }, + }), + bump, + ); + result.sort_unstable(); + result +} diff --git a/bluejay-operation-normalizer/src/ir.rs b/bluejay-operation-normalizer/src/ir.rs new file mode 100644 index 0000000..90cfaba --- /dev/null +++ b/bluejay-operation-normalizer/src/ir.rs @@ -0,0 +1,67 @@ +//! Normalized IR types used between the build and serialize phases. +//! +//! These represent the output of algorithm step 2: a tree of fields and inline fragments +//! with aliases removed, fragments expanded, values erased, and everything sorted. +//! All collections use bump-allocated vectors ([`BVec`]) for arena allocation. + +use bumpalo::collections::Vec as BVec; + +/// A directive with its name and sorted argument names (step 2a). +/// Argument values are omitted during serialization. +#[derive(Clone, Debug)] +pub(crate) struct NormalizedDirective<'a, 'bump> { + pub name: &'a str, + pub arg_names: BVec<'bump, &'a str>, +} + +impl PartialEq for NormalizedDirective<'_, '_> { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.arg_names == other.arg_names + } +} + +impl Eq for NormalizedDirective<'_, '_> {} + +impl PartialOrd for NormalizedDirective<'_, '_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for NormalizedDirective<'_, '_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.name + .cmp(other.name) + .then_with(|| self.arg_names.as_slice().cmp(other.arg_names.as_slice())) + } +} + +/// A field with alias removed, arguments and directives sorted, and child +/// selections recursively normalized (step 2a). +pub(crate) struct NormalizedField<'a, 'bump> { + /// The underlying field name (alias is dropped). + pub name: &'a str, + /// Argument names sorted alphabetically. Values are erased. + pub arg_names: BVec<'bump, &'a str>, + /// Directives sorted by name, then by argument names. + pub directives: BVec<'bump, NormalizedDirective<'a, 'bump>>, + /// Recursively normalized child selections. + pub selections: BVec<'bump, NormalizedSelection<'a, 'bump>>, +} + +/// An inline fragment produced by expanding a named fragment spread (step 2b) +/// or kept from an existing inline fragment (step 2c). Inline fragments with +/// matching `(type_condition, directives)` are merged (step 2d). +pub(crate) struct NormalizedInlineFragment<'a, 'bump> { + pub type_condition: Option<&'a str>, + pub directives: BVec<'bump, NormalizedDirective<'a, 'bump>>, + pub selections: BVec<'bump, NormalizedSelection<'a, 'bump>>, +} + +/// A normalized selection: either a field or an inline fragment. +/// Sorted with fields first (by name), then inline fragments (by type condition, +/// then directives) — see step 2e. +pub(crate) enum NormalizedSelection<'a, 'bump> { + Field(NormalizedField<'a, 'bump>), + InlineFragment(NormalizedInlineFragment<'a, 'bump>), +} diff --git a/bluejay-operation-normalizer/src/lib.rs b/bluejay-operation-normalizer/src/lib.rs new file mode 100644 index 0000000..03b64ba --- /dev/null +++ b/bluejay-operation-normalizer/src/lib.rs @@ -0,0 +1,621 @@ +//! # GraphQL Operation Normalizer +//! +//! Produces a **canonical string representation** of a GraphQL operation, suitable for +//! generating stable signatures (hashes) that group operations by structure rather than +//! syntax. Two operations that differ only in whitespace, field ordering, alias names, +//! fragment style, argument values, or variable definitions will produce the same +//! signature. +//! +//! The normalizer eliminates the most common sources of trivial divergence while keeping +//! the implementation simple. It does **not** attempt full semantic equivalence — for +//! example, duplicate fields are not merged, and inline fragments are only merged at the +//! same nesting level, not across parent-child boundaries. +//! +//! ## Normalization Algorithm +//! +//! Given a parsed `ExecutableDocument` and an (optional) operation name: +//! +//! 1. **Resolve the operation** — find the target operation definition by name, or use the +//! sole operation if unnamed. Error if ambiguous or missing. +//! +//! 2. **Build a normalized IR** from the operation's selection set, recursively processing +//! each selection. This is a single bottom-up pass that builds and normalizes each level +//! before returning it to the parent: +//! +//! a. **Fields** — collect the field name (dropping any alias), sorted argument names, +//! sorted directives, and recursively normalized child selections. +//! ```graphql +//! # input +//! { myAlias: field(z: 1, a: 2) } +//! # normalized +//! query{field(a:,z:)} +//! ``` +//! +//! b. **Fragment spreads** — expand inline: replace `...FragName` with +//! `... on { }`, merging directives from both the spread +//! and the fragment definition. Unused fragments are naturally excluded. Cycles are +//! detected via a stack of currently-expanding fragment names. +//! ```graphql +//! # input +//! { ...F } +//! fragment F on User { name } +//! # normalized — spread replaced with inline fragment, named fragment dropped +//! query{...on User{name}} +//! ``` +//! +//! c. **Inline fragments** — if bare (no type condition, no directives), flatten their +//! children directly into the parent selection set. Otherwise, keep as-is. +//! ```graphql +//! # input — bare inline fragment (no type condition, no directives) +//! { ... { name email } } +//! # normalized — children flattened into parent +//! query{email name} +//! ``` +//! +//! d. **Merge inline fragments** — at each level, merge inline fragments that share the +//! same `(type_condition, directives)` pair into a single inline fragment, combining +//! their child selections. +//! ```graphql +//! # input — two inline fragments on same type +//! { ... on User { name } ... on User { email } } +//! # normalized — merged into one +//! query{...on User{email name}} +//! ``` +//! +//! e. **Sort selections** — fields first (alphabetically by field name), then inline +//! fragments (by type condition, then by directives). +//! ```graphql +//! # input +//! { z a ... on User { b } m } +//! # normalized — fields sorted first, then inline fragments +//! query{a m z ...on User{b}} +//! ``` +//! +//! 3. **Serialize** the normalized IR to a compact canonical string: +//! - Operation type keyword (`query`, `mutation`, `subscription`) with no name. +//! - Variable definitions are dropped entirely. +//! - Argument and directive values are omitted (only `name:` is kept). +//! - No whitespace except single spaces separating selections within `{ }`. +//! - Argument names are sorted alphabetically within each argument list. +//! - Directive names are sorted alphabetically. +//! ```graphql +//! # input +//! query MyQuery($id: ID!) @z @a { +//! user(id: $id) { name email } +//! } +//! # normalized — name dropped, vars dropped, directives sorted, values omitted +//! query@a@z{user(id:){email name}} +//! ``` +//! +//! 4. **Signature** — optionally hash the canonical string with BLAKE3 to produce a +//! stable hex digest. +//! +//! ## Module Structure +//! +//! - [`ir`] — Normalized IR types. (Step 2 data structures) +//! - [`build`] — Builds normalized IR from the parsed AST in a single recursive pass. +//! (Steps 2a–2e) +//! - [`normalize`] — Entry point that orchestrates resolution, building, and serialization. +//! (Steps 1–3) +//! - [`serialize`] — Writes the normalized IR to a canonical string. (Step 3) + +mod build; +mod ir; +mod normalize; +mod serialize; + +use bluejay_core::executable::ExecutableDocument; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SignatureError { + OperationNotFound(String), + AmbiguousOperation, + NoOperations, +} + +impl std::fmt::Display for SignatureError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::OperationNotFound(name) => write!(f, "operation not found: {name}"), + Self::AmbiguousOperation => { + write!(f, "multiple operations found; specify operation name") + } + Self::NoOperations => write!(f, "no operations in document"), + } + } +} + +impl std::error::Error for SignatureError {} + +pub fn normalize( + doc: &E, + op_name: Option<&str>, +) -> Result { + normalize::normalize_doc::(doc, op_name) +} + +pub fn signature( + doc: &E, + op_name: Option<&str>, +) -> Result { + let normalized = normalize::(doc, op_name)?; + Ok(blake3::hash(normalized.as_bytes()).to_hex().to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use bluejay_parser::ast::{executable::ExecutableDocument as ParserDoc, Parse}; + + fn parse(input: &str) -> ParserDoc { + ParserDoc::parse(input).result.expect("parse error") + } + + // === Basic field sorting === + + #[test] + fn fields_sorted() { + let doc = parse("{ z a m }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{a m z}"); + } + + #[test] + fn nested_selection_sorting() { + let doc = parse("{ parent { z a m } }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{parent{a m z}}"); + } + + #[test] + fn duplicate_fields_preserved() { + let doc = parse("{ a a a }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{a a a}"); + } + + // === Argument handling (values omitted) === + + #[test] + fn arguments_sorted_and_values_replaced() { + let doc = parse("{ field(z: 1, a: 2, m: 3) }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{field(a:,m:,z:)}"); + } + + #[test] + fn all_value_types_replaced() { + let doc = parse( + r#"{ field(a: 42, b: 3.14, c: "hello", d: true, e: false, f: null, g: ENUM, h: [1,2], i: {x: 1}, j: $var) }"#, + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{field(a:,b:,c:,d:,e:,f:,g:,h:,i:,j:)}" + ); + } + + // === Variable definitions and operation names dropped === + + #[test] + fn variable_definitions_dropped() { + let doc = parse("query($z: String, $a: Int, $m: Boolean) { field }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{field}"); + } + + #[test] + fn operation_name_dropped() { + let doc = parse("query MyQuery { field }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{field}"); + } + + // === Aliases removed === + + #[test] + fn alias_removed() { + let doc = parse("{ myAlias: someField }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{someField}"); + } + + #[test] + fn aliased_fields_sorted_by_field_name() { + // After alias removal, sort by the actual field name + let doc = parse("{ z: fieldZ a: fieldA m: fieldM }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{fieldA fieldM fieldZ}" + ); + } + + // === Fragment expansion === + + #[test] + fn fragment_expanded_to_inline() { + let doc = parse( + "query { ...F } + fragment F on Query { a }", + ); + assert_eq!(normalize(&doc, None).unwrap(), "query{...on Query{a}}"); + } + + #[test] + fn same_type_fragments_merged() { + // Two fragments on same type get merged into one InlineFragment + let doc = parse( + "query { ...A ...B } + fragment A on Query { a } + fragment B on Query { b }", + ); + assert_eq!(normalize(&doc, None).unwrap(), "query{...on Query{a b}}"); + } + + #[test] + fn different_type_fragments_not_merged() { + let doc = parse( + "query { ...A ...B } + fragment A on TypeA { a } + fragment B on TypeB { b }", + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on TypeA{a} ...on TypeB{b}}" + ); + } + + #[test] + fn transitive_fragments_expanded() { + let doc = parse( + "query { ...A } + fragment A on T { ...B a } + fragment B on T { ...C b } + fragment C on T { c }", + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on T{a ...on T{b ...on T{c}}}}" + ); + } + + #[test] + fn unused_fragments_naturally_excluded() { + let doc = parse( + "query { ...Used } + fragment Used on Query { a } + fragment Unused on Query { b }", + ); + assert_eq!(normalize(&doc, None).unwrap(), "query{...on Query{a}}"); + } + + #[test] + fn fragment_spread_directives_merged_with_def_directives() { + let doc = parse( + "query { ...F @skip(if: true) } + fragment F on T @deprecated { a }", + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on T@deprecated@skip(if:){a}}" + ); + } + + // === Inline fragment handling === + + #[test] + fn bare_inline_fragment_flattened() { + // InlineFragment with no type condition and no directives → flattened + let doc = parse("{ ... { field } }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{field}"); + } + + #[test] + fn inline_fragment_with_directive_preserved() { + // Has directive → not flattened even without type condition + let doc = parse("{ ... @include(if: true) { field } }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...@include(if:){field}}" + ); + } + + #[test] + fn inline_fragment_with_type_condition_preserved() { + let doc = parse("{ ... on Query { field } }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{...on Query{field}}"); + } + + #[test] + fn same_type_inline_fragments_merged() { + let doc = parse("query { ... on Query { a } ... on Query { b } }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{...on Query{a b}}"); + } + + #[test] + fn different_directive_inline_fragments_not_merged() { + let doc = parse("query { ... on T @a { x } ... on T @b { y } }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on T@a{x} ...on T@b{y}}" + ); + } + + // === Sort order: fields first, then inline fragments === + + #[test] + fn selection_sort_order() { + let doc = parse( + "query { + ... on Query { inlined } + ...Frag + field + regular + } + fragment Frag on Query { x }", + ); + // Fields first (alpha), then InlineFragments (alpha by TC) + // Frag expands to ... on Query { x }, merges with ... on Query { inlined } + assert_eq!( + normalize(&doc, None).unwrap(), + "query{field regular ...on Query{inlined x}}" + ); + } + + #[test] + fn field_without_directive_before_field_with_directive() { + let doc = parse("query { name @uppercase name }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{name name@uppercase}"); + } + + #[test] + fn fields_tiebreak_by_arguments() { + let doc = parse("query { field(z: 1) field(a: 1) field }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{field field(a:) field(z:)}" + ); + } + + #[test] + fn fields_tiebreak_by_directives_after_arguments() { + let doc = parse("query { field(a: 1) @z field(a: 1) @a }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{field(a:)@a field(a:)@z}" + ); + } + + // === Directives === + + #[test] + fn directive_sorting() { + let doc = parse("query @z @a @m { field }"); + assert_eq!(normalize(&doc, None).unwrap(), "query@a@m@z{field}"); + } + + #[test] + fn directive_arguments_sorted() { + let doc = parse("{ field @custom(z: 1, a: 2) }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{field@custom(a:,z:)}"); + } + + #[test] + fn repeated_directives_preserved() { + let doc = parse( + r#"query { products @filter(field: "price", op: "gt", value: "10") @filter(field: "category", op: "eq", value: "electronics") { id } }"#, + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{products@filter(field:,op:,value:)@filter(field:,op:,value:){id}}" + ); + } + + // === Operation types === + + #[test] + fn mutation() { + let doc = parse("mutation { doThing }"); + assert_eq!(normalize(&doc, None).unwrap(), "mutation{doThing}"); + } + + #[test] + fn subscription() { + let doc = parse("subscription { onEvent }"); + assert_eq!(normalize(&doc, None).unwrap(), "subscription{onEvent}"); + } + + // === Error cases === + + #[test] + fn error_operation_not_found() { + let doc = parse("query Foo { a }"); + assert_eq!( + normalize(&doc, Some("Bar")), + Err(SignatureError::OperationNotFound("Bar".to_string())) + ); + } + + #[test] + fn error_ambiguous_operation() { + let doc = parse("query A { a } query B { b }"); + assert_eq!( + normalize(&doc, None), + Err(SignatureError::AmbiguousOperation) + ); + } + + #[test] + fn error_no_operations() { + let doc = parse("fragment F on Query { a }"); + assert_eq!(normalize(&doc, None), Err(SignatureError::NoOperations)); + } + + #[test] + fn named_operation_selection() { + let doc = parse("query A { a } query B { b }"); + assert_eq!(normalize(&doc, Some("B")).unwrap(), "query{b}"); + } + + // === Signature hash === + + #[test] + fn signature_hash() { + let doc = parse("{ field }"); + let normalized = normalize(&doc, None).unwrap(); + assert_eq!(normalized, "query{field}"); + let sig = signature(&doc, None).unwrap(); + let expected = blake3::hash(b"query{field}").to_hex().to_string(); + assert_eq!(sig, expected); + } + + // === Idempotency === + + #[test] + fn deterministic() { + let input = "query @dir { b a field(x: 1) }"; + let doc = parse(input); + assert_eq!( + normalize(&doc, None).unwrap(), + normalize(&doc, None).unwrap() + ); + } + + // === Canonical form: different representations → same hash === + + #[test] + fn fragments_vs_inline_same_hash() { + let with_fragment = parse( + "query { ...F } + fragment F on T { a b }", + ); + let with_inline = parse("query { ... on T { a b } }"); + assert_eq!( + normalize(&with_fragment, None).unwrap(), + normalize(&with_inline, None).unwrap(), + ); + } + + #[test] + fn alias_vs_no_alias_same_hash() { + let with_alias = parse("{ myAlias: field }"); + let without_alias = parse("{ field }"); + assert_eq!( + normalize(&with_alias, None).unwrap(), + normalize(&without_alias, None).unwrap(), + ); + } + + #[test] + fn different_values_same_hash() { + let a = parse(r#"{ field(arg: "hello") }"#); + let b = parse(r#"{ field(arg: "world") }"#); + assert_eq!(normalize(&a, None).unwrap(), normalize(&b, None).unwrap(),); + } + + #[test] + fn different_variable_names_same_hash() { + let a = parse("query($foo: String) { field(arg: $foo) }"); + let b = parse(r#"query($bar: String! = "Thing") { field(arg: $bar) }"#); + assert_eq!(normalize(&a, None).unwrap(), normalize(&b, None).unwrap(),); + } + + #[test] + fn reordered_inline_fragments_same_hash() { + let a = parse("query { ... on Query { a } ... on Query { b } }"); + let b = parse("query { ... on Query { b } ... on Query { a } }"); + assert_eq!(normalize(&a, None), normalize(&b, None)); + } + + // === Edge cases === + + #[test] + fn fragment_cycle_detected_and_skipped() { + let doc = parse( + "query { ...A } + fragment A on T { field ...B } + fragment B on T { other ...A }", + ); + let result = normalize(&doc, None).unwrap(); + // Second ...A is a cycle — silently skipped + assert_eq!(result, "query{...on T{field ...on T{other}}}"); + } + + #[test] + fn bare_inline_fragment_flattened_then_coexists_with_typed() { + let doc = parse("query { ... { a } ... on Query { b } }"); + // Bare fragment flattened to field `a`, not merged with typed fragment + assert_eq!(normalize(&doc, None).unwrap(), "query{a ...on Query{b}}"); + } + + #[test] + fn expanded_fragment_merges_with_existing_inline() { + let doc = parse( + "query { ... on T @a { x } ...F } + fragment F on T @a { y }", + ); + // F expands to ... on T @a { y }, merges with existing ... on T @a { x } + assert_eq!(normalize(&doc, None).unwrap(), "query{...on T@a{x y}}"); + } + + #[test] + fn operation_directives_with_arguments() { + let doc = parse("query @z(x: 1) @a(b: 2, c: 3) @m { field }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query@a(b:,c:)@m@z(x:){field}" + ); + } + + #[test] + fn inline_fragment_with_multiple_directives() { + let doc = parse("query { ... on User @skip(if: true) @include(if: false) { name } }"); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on User@include(if:)@skip(if:){name}}" + ); + } + + #[test] + fn fragment_spread_multiple_directives_on_both_spread_and_def() { + let doc = parse( + "query { ...F @b @a } + fragment F on T @d @c { field }", + ); + // Spread directives [@b, @a] + def directives [@d, @c] → merged and sorted + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on T@a@b@c@d{field}}" + ); + } + + #[test] + fn three_same_type_inline_fragments_merged() { + let doc = parse("query { ... on T { a } ... on T { b } ... on T { c } }"); + assert_eq!(normalize(&doc, None).unwrap(), "query{...on T{a b c}}"); + } + + #[test] + fn three_same_type_fragments_with_interleaved_other() { + // IF_A, IF_B, IF_A2, IF_A3 — tests swap_remove doesn't skip elements + let doc = parse( + "query { + ... on T { a } + ... on Other { x } + ... on T { b } + ... on T { c } + }", + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on Other{x} ...on T{a b c}}" + ); + } + + #[test] + fn inline_fragment_sort_multiple_type_conditions() { + let doc = parse( + "query { + ... on Zebra { z } + ... on Admin { a } + ... on Middle { m } + }", + ); + assert_eq!( + normalize(&doc, None).unwrap(), + "query{...on Admin{a} ...on Middle{m} ...on Zebra{z}}" + ); + } +} diff --git a/bluejay-operation-normalizer/src/normalize.rs b/bluejay-operation-normalizer/src/normalize.rs new file mode 100644 index 0000000..b0eaf13 --- /dev/null +++ b/bluejay-operation-normalizer/src/normalize.rs @@ -0,0 +1,62 @@ +//! Orchestrates the normalization pipeline (algorithm steps 1–3). +//! +//! 1. Resolves the target operation. +//! 2. Delegates to [`build::build_selections`] for IR construction + normalization. +//! 3. Delegates to [`serialize::serialize`] for canonical string output. + +use bluejay_core::executable::{ExecutableDocument, FragmentDefinition, OperationDefinition}; +use bumpalo::Bump; + +use crate::build::{build_directives, build_selections}; +use crate::serialize::serialize; +use crate::SignatureError; + +/// Entry point: resolve the operation, build normalized IR, serialize to canonical string. +pub(crate) fn normalize_doc( + doc: &E, + op_name: Option<&str>, +) -> Result { + let operation = resolve_operation::(doc, op_name)?; + let op_ref = operation.as_ref(); + + let bump = Bump::new(); + + let fragment_defs: Vec<(&str, &E::FragmentDefinition)> = + doc.fragment_definitions().map(|f| (f.name(), f)).collect(); + + let op_directives = build_directives::(op_ref.directives(), &bump); + + let mut expanding = Vec::new(); + let selections = build_selections::( + op_ref.selection_set(), + &fragment_defs, + &mut expanding, + &bump, + ); + + Ok(serialize( + op_ref.operation_type(), + &op_directives, + &selections, + )) +} + +fn resolve_operation<'a, E: ExecutableDocument>( + doc: &'a E, + op_name: Option<&str>, +) -> Result<&'a E::OperationDefinition, SignatureError> { + match op_name { + Some(name) => doc + .operation_definitions() + .find(|op| op.as_ref().name() == Some(name)) + .ok_or_else(|| SignatureError::OperationNotFound(name.to_string())), + None => { + let mut iter = doc.operation_definitions(); + let first = iter.next().ok_or(SignatureError::NoOperations)?; + if iter.next().is_some() { + return Err(SignatureError::AmbiguousOperation); + } + Ok(first) + } + } +} diff --git a/bluejay-operation-normalizer/src/serialize.rs b/bluejay-operation-normalizer/src/serialize.rs new file mode 100644 index 0000000..26c79b8 --- /dev/null +++ b/bluejay-operation-normalizer/src/serialize.rs @@ -0,0 +1,83 @@ +//! Serializes the normalized IR to a compact canonical string (algorithm step 3). +//! +//! Output format: +//! - Operation type keyword, no operation name, no variable definitions. +//! - Argument and directive values are omitted, leaving only `name:` (matching +//! the [ArgumentCoordinate](https://spec.graphql.org/draft/#ArgumentCoordinate) format). +//! - No whitespace except single spaces separating selections within `{ }`. +//! - Example: `query{field(a:,b:)@dir{nested}}` + +use bluejay_core::OperationType; + +use crate::ir::{ + NormalizedDirective, NormalizedField, NormalizedInlineFragment, NormalizedSelection, +}; + +/// Serialize a normalized operation to its canonical string form (step 3). +pub(crate) fn serialize( + op_type: OperationType, + directives: &[NormalizedDirective<'_, '_>], + selections: &[NormalizedSelection<'_, '_>], +) -> String { + let mut out = String::with_capacity(256); + out.push_str(op_type.as_ref()); + write_directives(&mut out, directives); + write_selection_set(&mut out, selections); + out +} + +fn write_selection_set(out: &mut String, selections: &[NormalizedSelection<'_, '_>]) { + out.push('{'); + for (i, sel) in selections.iter().enumerate() { + if i > 0 { + out.push(' '); + } + match sel { + NormalizedSelection::Field(f) => write_field(out, f), + NormalizedSelection::InlineFragment(inf) => write_inline_fragment(out, inf), + } + } + out.push('}'); +} + +fn write_field(out: &mut String, field: &NormalizedField<'_, '_>) { + out.push_str(field.name); + write_arguments(out, &field.arg_names); + write_directives(out, &field.directives); + if !field.selections.is_empty() { + write_selection_set(out, &field.selections); + } +} + +fn write_inline_fragment(out: &mut String, inf: &NormalizedInlineFragment<'_, '_>) { + out.push_str("..."); + if let Some(tc) = inf.type_condition { + out.push_str("on "); + out.push_str(tc); + } + write_directives(out, &inf.directives); + write_selection_set(out, &inf.selections); +} + +fn write_directives(out: &mut String, directives: &[NormalizedDirective<'_, '_>]) { + for dir in directives { + out.push('@'); + out.push_str(dir.name); + write_arguments(out, &dir.arg_names); + } +} + +fn write_arguments(out: &mut String, arg_names: &[&str]) { + if arg_names.is_empty() { + return; + } + out.push('('); + for (i, name) in arg_names.iter().enumerate() { + if i > 0 { + out.push(','); + } + out.push_str(name); + out.push(':'); + } + out.push(')'); +} diff --git a/bluejay-parser/src/ast/executable/executable_document.rs b/bluejay-parser/src/ast/executable/executable_document.rs index 78a7faa..419f1d7 100644 --- a/bluejay-parser/src/ast/executable/executable_document.rs +++ b/bluejay-parser/src/ast/executable/executable_document.rs @@ -192,7 +192,7 @@ mod tests { .result .unwrap_err(); - assert_eq!(1, errors.len(), "{:?}", errors); + assert_eq!(1, errors.len(), "{errors:?}"); assert_eq!("Max depth exceeded", errors[0].message()); diff --git a/bluejay-typegen-codegen/src/executable_definition/intermediate_representation.rs b/bluejay-typegen-codegen/src/executable_definition/intermediate_representation.rs index 6c7f1a9..725a282 100644 --- a/bluejay-typegen-codegen/src/executable_definition/intermediate_representation.rs +++ b/bluejay-typegen-codegen/src/executable_definition/intermediate_representation.rs @@ -219,11 +219,11 @@ impl ExecutableStruct<'_> { fn prefix_for_schema_definition_module(&self) -> impl Iterator { // root is one level higher than the executable/query module - std::iter::repeat(Default::default()).take(self.depth + 1) + std::iter::repeat_n(Default::default(), self.depth + 1) } fn prefix_for_executable_document_module(&self) -> impl Iterator { - std::iter::repeat(Default::default()).take(self.depth) + std::iter::repeat_n(Default::default(), self.depth) } } @@ -257,7 +257,7 @@ impl ExecutableEnum<'_> { pub enum WrappedExecutableType<'a> { /// a required type, unless wrapped in an `Optional` - Base(ExecutableType<'a>), + Base(Box>), /// an optional type Optional(Box>), /// a list type, required unless wrapped in an `Optional` @@ -533,13 +533,13 @@ impl<'a, E: ExecutableDocument, S: SchemaDefinition, C: CodeGenerator> } } OutputTypeReference::Base(inner, required) => { - let base_type = WrappedExecutableType::Base(self.build_base_type( + let base_type = WrappedExecutableType::Base(Box::new(self.build_base_type( field.response_name(), field.selection_set(), inner, depth, path, - )); + ))); if required { base_type } else { diff --git a/bluejay-typegen-codegen/src/input.rs b/bluejay-typegen-codegen/src/input.rs index 373f725..d368cb7 100644 --- a/bluejay-typegen-codegen/src/input.rs +++ b/bluejay-typegen-codegen/src/input.rs @@ -57,7 +57,7 @@ impl DocumentInput { let file_path = base_path.join(filename.value()); std::fs::read_to_string(file_path) - .map_err(|err| syn::Error::new(filename.span(), format!("{}", err))) + .map_err(|err| syn::Error::new(filename.span(), format!("{err}"))) } } @@ -117,7 +117,7 @@ pub(crate) fn parse_key_value_with( if value.is_some() { return Err(syn::Error::new( key.span(), - format!("Duplicate entry for `{}`", key), + format!("Duplicate entry for `{key}`"), )); } diff --git a/bluejay-validator/src/executable/operation/analyzers/input_size.rs b/bluejay-validator/src/executable/operation/analyzers/input_size.rs index 8468bc1..ae85185 100644 --- a/bluejay-validator/src/executable/operation/analyzers/input_size.rs +++ b/bluejay-validator/src/executable/operation/analyzers/input_size.rs @@ -96,7 +96,7 @@ fn find_input_size_offenders_arguments< offenders, variable_values, variable_definitions, - format!("{}.{}", argument_name, index), + format!("{argument_name}.{index}"), item, ); }) @@ -170,7 +170,7 @@ fn find_input_size_offenders_variables( max_length, offenders, - format!("{}.{}", argument_name, index), + format!("{argument_name}.{index}"), item, ); }) diff --git a/bluejay-validator/src/executable/operation/analyzers/variable_values_are_valid.rs b/bluejay-validator/src/executable/operation/analyzers/variable_values_are_valid.rs index 014c6df..c89dbdd 100644 --- a/bluejay-validator/src/executable/operation/analyzers/variable_values_are_valid.rs +++ b/bluejay-validator/src/executable/operation/analyzers/variable_values_are_valid.rs @@ -209,13 +209,7 @@ mod tests { "#, None, &serde_json::json!({}), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -244,13 +238,7 @@ mod tests { "#, None, &serde_json::json!({}), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -260,13 +248,7 @@ mod tests { "#, None, &serde_json::json!({ "arg": "value" }), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -276,13 +258,7 @@ mod tests { "#, None, &serde_json::json!({ "arg": null }), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -311,13 +287,7 @@ mod tests { "#, None, &serde_json::json!({ "arg": "value" }), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -356,13 +326,7 @@ mod tests { "#, None, &serde_json::json!({}), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -372,13 +336,7 @@ mod tests { "#, None, &serde_json::json!({}), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -388,13 +346,7 @@ mod tests { "#, None, &serde_json::json!({ "arg": "value" }), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" @@ -404,13 +356,7 @@ mod tests { "#, None, &serde_json::json!({ "arg": null }), - |errors| { - assert!( - errors.is_empty(), - "Expected errors to be empty: {:?}", - errors - ) - }, + |errors| assert!(errors.is_empty(), "Expected errors to be empty: {errors:?}",), ); validate_variable_values( r#" diff --git a/bluejay-validator/src/executable/operation/orchestrator.rs b/bluejay-validator/src/executable/operation/orchestrator.rs index bb9b41f..d54d369 100644 --- a/bluejay-validator/src/executable/operation/orchestrator.rs +++ b/bluejay-validator/src/executable/operation/orchestrator.rs @@ -384,7 +384,9 @@ pub enum OperationResolutionError<'a> { impl OperationResolutionError<'_> { pub fn message(&self) -> Cow<'static, str> { match self { - Self::NoOperationWithName { name } => format!("No operation defined with name {}", name).into(), + Self::NoOperationWithName { name } => { + format!("No operation defined with name {name}").into() + } Self::AnonymousNotEligible => "Anonymous operation can only be used when the document contains exactly one operation definition".into(), } } diff --git a/bluejay-validator/src/path.rs b/bluejay-validator/src/path.rs index a877cba..21b95c4 100644 --- a/bluejay-validator/src/path.rs +++ b/bluejay-validator/src/path.rs @@ -21,8 +21,8 @@ impl From for PathElement<'_> { impl std::fmt::Display for PathElement<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - PathElement::Key(s) => write!(f, "{}", s), - PathElement::Index(i) => write!(f, "{}", i), + PathElement::Key(s) => write!(f, "{s}"), + PathElement::Index(i) => write!(f, "{i}"), } } }