diff --git a/.changeset/fix-limited-query-deduplication.md b/.changeset/fix-limited-query-deduplication.md new file mode 100644 index 000000000..1298aa469 --- /dev/null +++ b/.changeset/fix-limited-query-deduplication.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Fixed incorrect deduplication of limited queries with different where clauses. Previously, a query like `{where: searchFilter, limit: 10}` could be incorrectly deduplicated against a prior query `{where: undefined, limit: 10}`, causing search/filter results to only show cached data. Now, limited queries are only deduplicated when their where clauses are structurally equal. diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts index 35d95b98e..3ab7434d2 100644 --- a/packages/db/src/query/predicate-utils.ts +++ b/packages/db/src/query/predicate-utils.ts @@ -802,6 +802,33 @@ export function isPredicateSubset( subset: LoadSubsetOptions, superset: LoadSubsetOptions ): boolean { + // When the superset has a limit, we can only determine subset relationship + // if the where clauses are equal (not just subset relationship). + // + // This is because a limited query only loads a portion of the matching rows. + // A more restrictive where clause might require rows outside that portion. + // + // Example: superset = {where: undefined, limit: 10, orderBy: desc} + // subset = {where: LIKE 'search%', limit: 10, orderBy: desc} + // The top 10 items matching 'search%' might include items outside the overall top 10. + // + // However, if the where clauses are equal, then the subset relationship can + // be determined by orderBy and limit alone: + // Example: superset = {where: status='active', limit: 10, orderBy: desc} + // subset = {where: status='active', limit: 5, orderBy: desc} + // The top 5 active items ARE contained in the top 10 active items. + if (superset.limit !== undefined) { + // For limited supersets, where clauses must be equal + if (!areWhereClausesEqual(subset.where, superset.where)) { + return false + } + return ( + isOrderBySubset(subset.orderBy, superset.orderBy) && + isLimitSubset(subset.limit, superset.limit) + ) + } + + // For unlimited supersets, use the normal subset logic return ( isWhereSubset(subset.where, superset.where) && isOrderBySubset(subset.orderBy, superset.orderBy) && @@ -809,6 +836,23 @@ export function isPredicateSubset( ) } +/** + * Check if two where clauses are structurally equal. + * Used for limited query subset checks where subset relationship isn't sufficient. + */ +function areWhereClausesEqual( + a: BasicExpression | undefined, + b: BasicExpression | undefined +): boolean { + if (a === undefined && b === undefined) { + return true + } + if (a === undefined || b === undefined) { + return false + } + return areExpressionsEqual(a, b) +} + // ============================================================================ // Helper functions // ============================================================================ diff --git a/packages/db/tests/query/predicate-utils.test.ts b/packages/db/tests/query/predicate-utils.test.ts index d2373f1fb..b390ea4bf 100644 --- a/packages/db/tests/query/predicate-utils.test.ts +++ b/packages/db/tests/query/predicate-utils.test.ts @@ -648,7 +648,8 @@ describe(`isLimitSubset`, () => { }) describe(`isPredicateSubset`, () => { - it(`should check all components`, () => { + it(`should check all components for unlimited superset`, () => { + // For unlimited supersets, where-subset logic applies const subset: LoadSubsetOptions = { where: gt(ref(`age`), val(20)), orderBy: [orderByClause(ref(`age`), `asc`)], @@ -660,11 +661,66 @@ describe(`isPredicateSubset`, () => { orderByClause(ref(`age`), `asc`), orderByClause(ref(`name`), `desc`), ], + // No limit - unlimited superset + } + expect(isPredicateSubset(subset, superset)).toBe(true) + }) + + it(`should require equal where clauses for limited supersets`, () => { + // For limited supersets, where clauses must be EQUAL + const sameWhere = gt(ref(`age`), val(10)) + + const subset: LoadSubsetOptions = { + where: sameWhere, + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 5, + } + const superset: LoadSubsetOptions = { + where: sameWhere, // Same where clause + orderBy: [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ], limit: 20, } expect(isPredicateSubset(subset, superset)).toBe(true) }) + it(`should return false for limited superset with different where clause`, () => { + // Even if subset's where is more restrictive, it can't be a subset + // of a limited superset with a different where clause. + // The top N items of "age > 20" may not be in the top M items of "age > 10" + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), // More restrictive + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 5, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), // Less restrictive but LIMITED + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 20, + } + // This should be FALSE because the top 5 of "age > 20" + // might include items outside the top 20 of "age > 10" + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false for limited superset with no where vs subset with where`, () => { + // This is the reported bug case: pagination with search filter + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), // Has a filter + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 10, + } + const superset: LoadSubsetOptions = { + where: undefined, // No filter but LIMITED + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 10, + } + // The filtered results might include items outside the unfiltered top 10 + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + it(`should return false if where is not subset`, () => { const subset: LoadSubsetOptions = { where: gt(ref(`age`), val(5)), diff --git a/packages/db/tests/query/subset-dedupe.test.ts b/packages/db/tests/query/subset-dedupe.test.ts index 59aa8c6a3..f1c1a956e 100644 --- a/packages/db/tests/query/subset-dedupe.test.ts +++ b/packages/db/tests/query/subset-dedupe.test.ts @@ -153,17 +153,20 @@ describe(`createDeduplicatedLoadSubset`, () => { }, ] + const whereClause = gt(ref(`age`), val(10)) + // First call: age > 10, orderBy age asc, limit 10 await deduplicated.loadSubset({ - where: gt(ref(`age`), val(10)), + where: whereClause, orderBy: orderBy1, limit: 10, }) expect(callCount).toBe(1) - // Second call: age > 20, orderBy age asc, limit 5 (subset) + // Second call: SAME where clause, same orderBy, smaller limit (subset) + // For limited queries, where clauses must be EQUAL for subset relationship const result = await deduplicated.loadSubset({ - where: gt(ref(`age`), val(20)), + where: whereClause, // Same where clause orderBy: orderBy1, limit: 5, }) @@ -171,6 +174,47 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(callCount).toBe(1) // Should not call - subset of first }) + it(`should NOT dedupe limited calls with different where clauses`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: DIFFERENT where clause (age > 20) - should NOT be deduped + // even though age > 20 is "more restrictive" than age > 10, + // the top 5 of age > 20 might not be in the top 10 of age > 10 + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 5, + }) + expect(callCount).toBe(2) // Should call - different where clause + }) + it(`should call underlying for non-subset limited calls`, async () => { let callCount = 0 const mockLoadSubset = () => { @@ -671,17 +715,20 @@ describe(`createDeduplicatedLoadSubset`, () => { }, ] + const whereClause = gt(ref(`age`), val(10)) + // First limited call await deduplicated.loadSubset({ - where: gt(ref(`age`), val(10)), + where: whereClause, orderBy: orderBy1, limit: 10, }) expect(callCount).toBe(1) - // Second limited call is a subset (stricter where and smaller limit) + // Second limited call is a subset (SAME where clause and smaller limit) + // For limited queries, where clauses must be EQUAL for subset relationship const subsetOptions = { - where: gt(ref(`age`), val(20)), + where: whereClause, // Same where clause orderBy: orderBy1, limit: 5, } @@ -741,4 +788,98 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(onDeduplicate).toHaveBeenCalledWith(subsetOptions) }) }) + + describe(`limited queries with different where clauses`, () => { + // When a query has a limit, only the top N rows (by orderBy) are loaded. + // A subsequent query with a different where clause cannot reuse that data, + // even if the new where clause is "more restrictive", because the filtered + // top N might include rows outside the original unfiltered top N. + + it(`should NOT dedupe when where clause differs on limited queries`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(options) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + const orderByCreatedAt: OrderBy = [ + { + expression: ref(`created_at`), + compareOptions: { + direction: `desc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First query: top 10 items with no filter + await deduplicated.loadSubset({ + where: undefined, + orderBy: orderByCreatedAt, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second query: top 10 items WITH a filter + // This requires a separate request because the filtered top 10 + // might include items outside the unfiltered top 10 + const searchWhere = and(eq(ref(`title`), val(`test`))) + await deduplicated.loadSubset({ + where: searchWhere, + orderBy: orderByCreatedAt, + limit: 10, + }) + + expect(callCount).toBe(2) + expect(calls[1]?.where).toEqual(searchWhere) + }) + + it(`should dedupe when where clause is identical on limited queries`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + const orderByCreatedAt: OrderBy = [ + { + expression: ref(`created_at`), + compareOptions: { + direction: `desc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First query: top 10 items with no filter + await deduplicated.loadSubset({ + where: undefined, + orderBy: orderByCreatedAt, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second query: same where clause (undefined), smaller limit + // The top 5 are contained within the already-loaded top 10 + const result = await deduplicated.loadSubset({ + where: undefined, + orderBy: orderByCreatedAt, + limit: 5, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) + }) + }) }) diff --git a/packages/electric-db-collection/tests/electric-live-query.test.ts b/packages/electric-db-collection/tests/electric-live-query.test.ts index 3a6489454..ece48b667 100644 --- a/packages/electric-db-collection/tests/electric-live-query.test.ts +++ b/packages/electric-db-collection/tests/electric-live-query.test.ts @@ -606,12 +606,12 @@ describe.each([ // Wait for the live query to process await new Promise((resolve) => setTimeout(resolve, 0)) - // With deduplication, the expanded query (limit 6) is NOT a subset of the limited query (limit 2), - // so it will trigger a new requestSnapshot call. However, some of the recursive - // calls may be deduped if they're covered by the union of previous unlimited calls. - // We expect at least 4 calls: 2x for the initial limit 2 and 2x for the initial limit 6. - // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this to 2 calls. - expect(mockRequestSnapshot).toHaveBeenCalledTimes(4) + // Limited queries are only deduplicated when their where clauses are equal. + // Both queries have the same where clause (active = true), but the second query + // with limit 6 needs more data than the first query with limit 2 provided. + // The internal query system makes additional requests as it processes the data. + // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(6) // Check that first it requested a limit of 2 users (from first query) expect(callArgs(0)).toMatchObject({ @@ -876,12 +876,10 @@ describe(`Electric Collection with Live Query - syncMode integration`, () => { }) ) - // With deduplication, the unlimited where predicate (no where clause) is tracked, - // and subsequent calls for the same unlimited predicate may be deduped. - // After receiving Bob and Charlie, we have 3 users total, which satisfies the limit of 3, - // so no additional requests should be made. - // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this to 1 call. - expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + // For limited queries, only requests with identical where clauses can be deduplicated. + // The internal query system may make additional requests as it processes the data. + // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(3) }) it(`should pass correct WHERE clause to requestSnapshot when live query has filters`, async () => { @@ -1053,7 +1051,46 @@ describe(`Electric Collection - loadSubset deduplication`, () => { ) }) - it(`should deduplicate subset loadSubset requests`, async () => { + it(`should deduplicate subset loadSubset requests with same where clause`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query with limit 20 + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 10)) + .orderBy(({ user }) => user.age, `asc`) + .limit(20), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create a live query with SAME where clause but smaller limit + // This SHOULD be deduped because where clauses are equal and limit is smaller + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 10)) // Same where clause + .orderBy(({ user }) => user.age, `asc`) + .limit(10), // Smaller limit + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still only 1 call - the second was deduped (same where, smaller limit) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + }) + + it(`should NOT deduplicate limited queries with different where clauses`, async () => { const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) simulateInitialSync([]) @@ -1074,22 +1111,23 @@ describe(`Electric Collection - loadSubset deduplication`, () => { expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) - // Create a live query with a subset predicate (age > 20 is subset of age > 10) - // This should be deduped - no additional requestSnapshot call + // Create a live query with a DIFFERENT where clause (even if more restrictive) + // This should NOT be deduped because for limited queries, where clauses must be EQUAL. + // The top 10 of "age > 20" might include rows outside the top 20 of "age > 10". createLiveQueryCollection({ startSync: true, query: (q) => q .from({ user: electricCollection }) - .where(({ user }) => gt(user.age, 20)) + .where(({ user }) => gt(user.age, 20)) // Different where clause .orderBy(({ user }) => user.age, `asc`) .limit(10), }) await new Promise((resolve) => setTimeout(resolve, 0)) - // Still only 1 call - the second was deduped as a subset - expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + // 2 calls - the second was NOT deduped (different where clause with limit) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) }) it(`should NOT deduplicate non-subset loadSubset requests`, async () => { @@ -1150,8 +1188,10 @@ describe(`Electric Collection - loadSubset deduplication`, () => { await new Promise((resolve) => setTimeout(resolve, 0)) - // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this to 1 call. - expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + // For limited queries, only requests with identical where clauses can be deduplicated. + // The internal query system may make additional requests as it processes data. + // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(3) // Simulate a must-refetch (which triggers truncate and reset) subscriber([{ headers: { control: `must-refetch` } }]) @@ -1160,13 +1200,13 @@ describe(`Electric Collection - loadSubset deduplication`, () => { // Wait for the existing live query to re-request data after truncate await new Promise((resolve) => setTimeout(resolve, 0)) - // The existing live query re-requests its data after truncate (call 2) - // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this to 1 call. - expect(mockRequestSnapshot).toHaveBeenCalledTimes(4) + // The existing live query re-requests its data after truncate + // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(5) // Create the same live query again after reset // This should NOT be deduped because the reset cleared the deduplication state, - // but it WILL be deduped because the existing live query just made the same request (call 2) + // but it WILL be deduped because the existing live query just made the same request // So creating a different query to ensure we test the reset createLiveQueryCollection({ startSync: true, @@ -1180,9 +1220,9 @@ describe(`Electric Collection - loadSubset deduplication`, () => { await new Promise((resolve) => setTimeout(resolve, 0)) - // Should have 5 calls - the different query triggered a new request - // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this to <=3 calls. - expect(mockRequestSnapshot).toHaveBeenCalledTimes(5) + // Should have more calls - the different query triggered a new request + // TODO: Once we have cursor based pagination with the PK as a tiebreaker, we can reduce this. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(6) }) it(`should deduplicate unlimited queries regardless of orderBy`, async () => {