-
Notifications
You must be signed in to change notification settings - Fork 418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Optimize postgresql storage get all fixes 1507 #1615
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -587,31 +587,24 @@ def get_all(self, collection_id, parent_id, filters=None, sorting=None, | |
modified_field=DEFAULT_MODIFIED_FIELD, | ||
deleted_field=DEFAULT_DELETED_FIELD, | ||
auth=None): | ||
query = """ | ||
WITH collection_filtered AS ( | ||
SELECT id, last_modified, data, deleted | ||
FROM records | ||
WHERE {parent_id_filter} | ||
AND collection_id = :collection_id | ||
{conditions_deleted} | ||
{conditions_filter} | ||
), | ||
total_filtered AS ( | ||
SELECT COUNT(id) AS count_total | ||
FROM collection_filtered | ||
WHERE NOT deleted | ||
), | ||
paginated_records AS ( | ||
SELECT DISTINCT id | ||
FROM collection_filtered | ||
{pagination_rules} | ||
) | ||
SELECT count_total, | ||
a.id, as_epoch(a.last_modified) AS last_modified, a.data | ||
FROM paginated_records AS p JOIN collection_filtered AS a ON (a.id = p.id), | ||
total_filtered | ||
{sorting} | ||
LIMIT :pagination_limit; | ||
count_query = """ | ||
SELECT COUNT(id) AS count_total | ||
FROM records | ||
WHERE {parent_id_filter} | ||
AND collection_id = :collection_id | ||
AND NOT deleted | ||
{conditions_filter}; | ||
""" | ||
select_query = """ | ||
SELECT id, as_epoch(last_modified) AS last_modified, data | ||
FROM records | ||
WHERE {pagination_rules} | ||
{parent_id_filter} | ||
AND collection_id = :collection_id | ||
{conditions_deleted} | ||
{conditions_filter} | ||
{sorting} | ||
LIMIT :pagination_limit; | ||
""" | ||
|
||
# Unsafe strings escaped by PostgreSQL | ||
|
@@ -647,21 +640,24 @@ def get_all(self, collection_id, parent_id, filters=None, sorting=None, | |
if pagination_rules: | ||
sql, holders = self._format_pagination(pagination_rules, id_field, | ||
modified_field) | ||
safeholders['pagination_rules'] = 'WHERE {}'.format(sql) | ||
safeholders['pagination_rules'] = '{} AND'.format(sql) | ||
placeholders.update(**holders) | ||
else: | ||
safeholders['pagination_rules'] = '' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: superfluous since |
||
|
||
# Limit the number of results (pagination). | ||
limit = min(self._max_fetch_size, limit) if limit else self._max_fetch_size | ||
placeholders['pagination_limit'] = limit | ||
|
||
with self.client.connect(readonly=True) as conn: | ||
result = conn.execute(query.format_map(safeholders), placeholders) | ||
result = conn.execute(select_query.format_map(safeholders), placeholders) | ||
retrieved = result.fetchmany(self._max_fetch_size) | ||
|
||
if len(retrieved) == 0: | ||
return [], 0 | ||
if len(retrieved) == 0: | ||
return [], 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There might be some further work to do here. The reason for why I didn't use the exact filter for the COUNT and the SELECT was because of this test: If that test is wrong, I can rewrite the two queries so that the SELECT and COUNT uses the exact same WHERE clause. Then, if we do that we can do the COUNT first (slightly faster since it's just an integer) and if it's 0 we can skip the SELECT and just return an empty list. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The variables are not named correctly in the tests, instead it could be: records_and_tombstones, records_count = self.storage.get_all(parent_id='abc',
collection_id='c',
include_deleted=True)
self.assertEqual(records_count, 0)
self.assertEqual(len(records_and_tombstones), 2) The returned count is the number of records, excluding tombstones. Tombstones are returned when The parameter |
||
|
||
count_total = retrieved[0]['count_total'] | ||
result_count = conn.execute(count_query.format_map(safeholders), placeholders) | ||
count_total, = result_count.fetchone() | ||
|
||
records = [] | ||
for result in retrieved: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: it only applies to PostgreSQL ;)
And also, I think it would make sense to mention the performance gain!