Skip to content

Commit

Permalink
Substantial speedups for the __eq__ of EqHash.
Browse files Browse the repository at this point in the history
Do this by unrolling the loop and generating code, eliminating all the
conditionals as well.

Before:
Avg Base  eq 0.762074232101 stddev 0.00893669830878
Avg Child eq 1.46989099185 stddev 0.0260021505811
Avg Super  eq 0.776515642802 stddev 0.011819047442
Avg Super2  eq 0.2257057031 stddev 0.0025750486944
Avg many  eq 1.56614136696 stddev 0.0195022584734

After:
Avg Base  eq 0.410983006159 stddev 0.00708241719015
Avg Child eq 0.903119166692 stddev 0.0051626944104
Avg Super  eq 0.41703470548 stddev 0.00604558003878
Avg Super2  eq 0.208957354228 stddev 0.00508863378261
Avg many  eq 0.797417243322 stddev 0.0198358058579
  • Loading branch information
jamadden committed Sep 9, 2016
1 parent acfbf04 commit e8fc0d2
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 42 deletions.
2 changes: 1 addition & 1 deletion CHANGES.rst
Expand Up @@ -9,7 +9,7 @@
==================

- Substantial speedups to the hash functions generated by ``EqHash``.

- Substantial speedups to the equality functions generated by ``EqHash``.

1.1.0 (2016-07-29)
==================
Expand Down
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -41,6 +41,7 @@ def _read(fname):
include_package_data=True,
zip_safe=True,
install_requires=[
'six',
'setuptools',
'zope.schema',
'zope.i18n',
Expand Down
82 changes: 41 additions & 41 deletions src/nti/schema/eqhash.py
Expand Up @@ -11,6 +11,8 @@

import operator

import six

def _superhash_force(value):
# Called when we know that we can't hash the value.
# Dict?
Expand Down Expand Up @@ -102,52 +104,50 @@ def x(cls):
return cls
return x

def _make_eq(cls, names, include_super, include_type):
# 1 and 0 are constants and faster to load than the globals True/False
# (in python 2)

eq_stmt = 'def __eq__(self, other'
if include_type or include_super:
# capture the type
eq_stmt += ', cls=cls'
eq_stmt += '):\n'
eq_stmt += ' if self is other: return 1\n'
if include_type:
eq_stmt += ' if not isinstance(other, cls): return 0\n'
if include_super:
eq_stmt += ' s = super(cls, self).__eq__(other)\n'
eq_stmt += ' if s is NotImplemented or not s: return s\n'

# We take these one at a time (rather than using
# operator.attrgetter). In the cases where some attributes
# are computed, this can be more efficient if we discover
# a mismatch early. Also, it lets us easily distinguish
# between an AttributeError on self (which is a
# programming error in calling EqHash) or the other object
for name in names:
eq_stmt += ' a = self.' + name + '\n'
eq_stmt += ' try:\n b = other.' + name + '\n'
eq_stmt += ' except AttributeError: return NotImplemented\n'
eq_stmt += ' if a != b: return 0\n\n'

eq_stmt += ' return 1'

# Must use a custom dictionary under Py3
lcls = dict(locals())
six.exec_(eq_stmt, globals(), lcls)

return lcls['__eq__']

def _eq_hash(cls, names, include_super, include_type, superhash): # pylint:disable=I0011,W0622,R0912
names = tuple((str(x) for x in names)) # make sure they're native strings, not unicode on Py2
# We assume the class hierarchy of these objects does not change
if include_super:
superclass = cls.__mro__[1]
superclass_eq = superclass.__eq__
superclass_hash = superclass.__hash__

# 1 and 0 are constants and faster to load than the globals True/False
# (in python 2)

def __eq__(self, other):
if self is other:
return 1

if include_type:
if not isinstance(other, cls):
return 0

if include_super:
s = superclass_eq(self, other)
if s is NotImplemented or not s:
return s


# We take these one at a time (rather than using
# operator.attrgetter). In the cases where some attributes
# are computed, this can be more efficient if we discover
# a mismatch early. Also, it lets us easily distinguish
# between an AttributeError on self (which is a
# programming error in calling EqHash) or the other object
_ga = getattr # load_fast in the loop
for name in names:
my_val = _ga(self, name)
try:
other_val =_ga(other, name)
except AttributeError:
return NotImplemented
else:
# Amusingly, even though this does
# more push/pop than inlining in the
# body of the try block, it benchmarks
# faster
if my_val != other_val:
return 0
return 1
__eq__ = _make_eq(cls, names, include_super, include_type)

def __ne__(self, other):
eq = __eq__(self, other)
Expand Down Expand Up @@ -228,7 +228,6 @@ def _hash(values):
# a tuple of values.
_hash = hash


# Unlike __eq__, we use operator.attrgetter because we're always
# going to request all the names. In tests, this is ~30% faster than
# a manual loop (for two to three names).
Expand All @@ -237,10 +236,11 @@ def _hash(values):
# though, it needs at least one name. Make sure to return a tuple for
# consistency.
def attrgetter(_):
return 42,
return ()
else:
# This will return a tuple of the values of the names.
attrgetter = operator.attrgetter(*names)

def __hash__(self):
h = seed
if include_super:
Expand Down
82 changes: 82 additions & 0 deletions src/nti/schema/tests/test_eqhash.py
Expand Up @@ -55,6 +55,15 @@ class ChildThingNoSuper(Thing):
class ChildThingNoNames(Thing):
pass

@EqHash('a', 'b', 'c', 'd', 'e', 'f')
class ManyThing(object):
a = 'a'
b = 'b'
c = 'c'
d = 'd'
e = 'e'
f = 'f'

class TestEqHash(unittest.TestCase):

def test_eq_hash(self):
Expand Down Expand Up @@ -224,9 +233,82 @@ def bench_hash():
print("Avg Super2 hash", statistics.mean(times), "stddev", statistics.stdev(times))


def bench_eq():
import timeit
import statistics


timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import Thing as Thing; thing=Thing(); thing2 = Thing()')
times = timer.repeat()
print("Avg Base eq", statistics.mean(times), "stddev", statistics.stdev(times))

timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import ChildThing as Thing; thing=Thing(); thing2 = Thing()')
times = timer.repeat()
print("Avg Child eq", statistics.mean(times), "stddev", statistics.stdev(times))

timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import Thing2 as Thing; thing=Thing(); thing2 = Thing()')
times = timer.repeat()
print("Avg Super eq", statistics.mean(times), "stddev", statistics.stdev(times))

timer = timeit.Timer('thing == thing', 'from nti.schema.tests.test_eqhash import Thing2 as Thing; thing=Thing(a={}); thing2 = Thing(a={})')
# import cProfile
# import pstats
# pr = cProfile.Profile()
# pr.enable()
times = timer.repeat()
# pr.disable()
# ps = pstats.Stats(pr).sort_stats('cumulative')
# ps.print_stats(.4)




print("Avg Super2 eq", statistics.mean(times), "stddev", statistics.stdev(times))

timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import ManyThing as Thing; thing=Thing(); thing2 = Thing()')
times = timer.repeat()
print("Avg many eq", statistics.mean(times), "stddev", statistics.stdev(times))


# Before
#
# Avg Base eq 0.790581703186 stddev 0.00709198228224
# Avg Child eq 1.44241364797 stddev 0.00058100921717
# Avg Super eq 0.772551695506 stddev 0.0120497874892
# Avg Super2 eq 0.230642795563 stddev 0.00981929676758

# Best attrgetter, params as keywords:
# Avg Base eq 0.57781457901 stddev 0.00472933000447
# Avg Child eq 1.13719065984 stddev 0.00751860996924
# Avg Super eq 0.576888004939 stddev 0.0073053209526
# Avg Super2 eq 0.221588929494 stddev 0.00154292380992

# Code generation
# Avg Base eq 0.436311562856 stddev 0.0159609115497
# Avg Child eq 0.93773595492 stddev 0.0244992238919
# Avg Super eq 0.443862199783 stddev 0.00816548353324
# Avg Super2 eq 0.216485659281 stddev 0.00651497010124

## Many attributes
# Before
# Avg Base eq 0.762074232101 stddev 0.00893669830878
# Avg Child eq 1.46989099185 stddev 0.0260021505811
# Avg Super eq 0.776515642802 stddev 0.011819047442
# Avg Super2 eq 0.2257057031 stddev 0.0025750486944
# Avg many eq 1.56614136696 stddev 0.0195022584734

# Code generation
# Avg Base eq 0.410983006159 stddev 0.00708241719015
# Avg Child eq 0.903119166692 stddev 0.0051626944104
# Avg Super eq 0.41703470548 stddev 0.00604558003878
# Avg Super2 eq 0.208957354228 stddev 0.00508863378261
# Avg many eq 0.797417243322 stddev 0.0198358058579

if __name__ == '__main__':
import sys
if '--timehash' in sys.argv:
bench_hash()
elif '--timeeq' in sys.argv:
bench_eq()
else:
unittest.main()

1 comment on commit e8fc0d2

@papachoco
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks pretty good... getattr and the loop sure can be slow

Please sign in to comment.