Skip to content

Commit

Permalink
Optimize fetchall() when many rows are fetched (#51)
Browse files Browse the repository at this point in the history
In this case, lookup all the type cast functions upfront.
This gives more than 30% better performance for large queries results.
  • Loading branch information
Cito committed Dec 3, 2020
1 parent 9671b83 commit 76db14d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 3 deletions.
23 changes: 20 additions & 3 deletions pgdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,11 +854,23 @@ def typecast(self, value, typ):
# for NULL values, no typecast is necessary
return None
cast = self._typecasts[typ]
if not cast or cast is str:
if cast is None or cast is str:
# no typecast is necessary
return value
return cast(value)

def get_row_caster(self, types):
"""Get a typecast function for a complete row of values."""
typecasts = self._typecasts
casts = [typecasts[typ] for typ in types]
casts = [cast if cast is not str else None for cast in casts]

def row_caster(row):
return [value if cast is None or value is None else cast(value)
for cast, value in zip(casts, row)]

return row_caster


class _quotedict(dict):
"""Dictionary with auto quoting of its items.
Expand Down Expand Up @@ -1177,10 +1189,15 @@ def fetchmany(self, size=None, keep=False):
raise
except Error as err:
raise _db_error(str(err))
typecast = self.type_cache.typecast
row_factory = self.row_factory
coltypes = self.coltypes
return [row_factory([typecast(value, typ)
if len(result) > 5:
# optimize the case where we really fetch many values
# by looking up all type casting functions upfront
cast_row = self.type_cache.get_row_caster(coltypes)
return [row_factory(cast_row(row)) for row in result]
cast_value = self.type_cache.typecast
return [row_factory([cast_value(value, typ)
for typ, value in zip(coltypes, row)]) for row in result]

def callproc(self, procname, parameters=None):
Expand Down
24 changes: 24 additions & 0 deletions tests/test_dbapi20.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,30 @@ def test_execute_edge_cases(self):
sql = 'select 1' # cannot be executed after connection is closed
self.assertRaises(pgdb.OperationalError, cur.execute, sql)

def test_fetchall_with_various_sizes(self):
# we test this because there are optimizations based on result size
con = self._connect()
try:
for n in (1, 3, 5, 7, 10, 100, 1000):
cur = con.cursor()
try:
cur.execute('select n, n::text as s, n %% 2 = 1 as b'
' from generate_series(1, %d) as s(n)' % n)
res = cur.fetchall()
self.assertEqual(len(res), n, res)
self.assertEqual(len(res[0]), 3)
self.assertEqual(res[0].n, 1)
self.assertEqual(res[0].s, '1')
self.assertIs(res[0].b, True)
self.assertEqual(len(res[-1]), 3)
self.assertEqual(res[-1].n, n)
self.assertEqual(res[-1].s, str(n))
self.assertIs(res[-1].b, n % 2 == 1)
finally:
cur.close()
finally:
con.close()

def test_fetchmany_with_keep(self):
con = self._connect()
try:
Expand Down

0 comments on commit 76db14d

Please sign in to comment.