Substantial speedups for the __eq__ of EqHash.

Do this by unrolling the loop and generating code, eliminating all the conditionals as well. Before: Avg Base eq 0.762074232101 stddev 0.00893669830878 Avg Child eq 1.46989099185 stddev 0.0260021505811 Avg Super eq 0.776515642802 stddev 0.011819047442 Avg Super2 eq 0.2257057031 stddev 0.0025750486944 Avg many eq 1.56614136696 stddev 0.0195022584734 After: Avg Base eq 0.410983006159 stddev 0.00708241719015 Avg Child eq 0.903119166692 stddev 0.0051626944104 Avg Super eq 0.41703470548 stddev 0.00604558003878 Avg Super2 eq 0.208957354228 stddev 0.00508863378261 Avg many eq 0.797417243322 stddev 0.0198358058579
OpenNTI · Sep 9, 2016 · e8fc0d2 · e8fc0d2 · papachoco · Sep 9, 2016
1 parent acfbf04
commit e8fc0d2
Show file tree

Hide file tree

Showing 4 changed files with 125 additions and 42 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -9,7 +9,7 @@
 ==================
 
 - Substantial speedups to the hash functions generated by ``EqHash``.
-
+- Substantial speedups to the equality functions generated by ``EqHash``.
 
 1.1.0 (2016-07-29)
 ==================

diff --git a/setup.py b/setup.py
@@ -41,6 +41,7 @@ def _read(fname):
     include_package_data=True,
     zip_safe=True,
     install_requires=[
+        'six',
         'setuptools',
         'zope.schema',
         'zope.i18n',

diff --git a/src/nti/schema/eqhash.py b/src/nti/schema/eqhash.py
@@ -11,6 +11,8 @@
 
 import operator
 
+import six
+
 def _superhash_force(value):
     # Called when we know that we can't hash the value.
     # Dict?
@@ -102,52 +104,50 @@ def x(cls):
         return cls
     return x
 
+def _make_eq(cls, names, include_super, include_type):
+    # 1 and 0 are constants and faster to load than the globals True/False
+    # (in python 2)
+
+    eq_stmt = 'def __eq__(self, other'
+    if include_type or include_super:
+        # capture the type
+        eq_stmt += ', cls=cls'
+    eq_stmt += '):\n'
+    eq_stmt += '    if self is other: return 1\n'
+    if include_type:
+        eq_stmt += '    if not isinstance(other, cls): return 0\n'
+    if include_super:
+        eq_stmt += '    s = super(cls, self).__eq__(other)\n'
+        eq_stmt += '    if s is NotImplemented or not s: return s\n'
+
+    # We take these one at a time (rather than using
+    # operator.attrgetter). In the cases where some attributes
+    # are computed, this can be more efficient if we discover
+    # a mismatch early. Also, it lets us easily distinguish
+    # between an AttributeError on self (which is a
+    # programming error in calling EqHash) or the other object
+    for name in names:
+        eq_stmt += '    a = self.' + name + '\n'
+        eq_stmt += '    try:\n        b = other.' + name + '\n'
+        eq_stmt += '    except AttributeError: return NotImplemented\n'
+        eq_stmt += '    if a != b: return 0\n\n'
+
+    eq_stmt += '    return 1'
+
+    # Must use a custom dictionary under Py3
+    lcls = dict(locals())
+    six.exec_(eq_stmt, globals(), lcls)
+
+    return lcls['__eq__']
+
 def _eq_hash(cls, names, include_super, include_type, superhash): # pylint:disable=I0011,W0622,R0912
     names = tuple((str(x) for x in names)) # make sure they're native strings, not unicode on Py2
     # We assume the class hierarchy of these objects does not change
     if include_super:
         superclass = cls.__mro__[1]
-        superclass_eq = superclass.__eq__
         superclass_hash = superclass.__hash__
 
-    # 1 and 0 are constants and faster to load than the globals True/False
-    # (in python 2)
-
-    def __eq__(self, other):
-        if self is other:
-            return 1
-
-        if include_type:
-            if not isinstance(other, cls):
-                return 0
-
-        if include_super:
-            s = superclass_eq(self, other)
-            if s is NotImplemented or not s:
-                return s
-
-
-        # We take these one at a time (rather than using
-        # operator.attrgetter). In the cases where some attributes
-        # are computed, this can be more efficient if we discover
-        # a mismatch early. Also, it lets us easily distinguish
-        # between an AttributeError on self (which is a
-        # programming error in calling EqHash) or the other object
-        _ga = getattr # load_fast in the loop
-        for name in names:
-            my_val = _ga(self, name)
-            try:
-                other_val =_ga(other, name)
-            except AttributeError:
-                return NotImplemented
-            else:
-                # Amusingly, even though this does
-                # more push/pop than inlining in the
-                # body of the try block, it benchmarks
-                # faster
-                if my_val != other_val:
-                    return 0
-        return 1
+    __eq__ = _make_eq(cls, names, include_super, include_type)
 
     def __ne__(self, other):
         eq = __eq__(self, other)
@@ -228,7 +228,6 @@ def _hash(values):
         # a tuple of values.
         _hash = hash
 
-
     # Unlike __eq__, we use operator.attrgetter because we're always
     # going to request all the names. In tests, this is ~30% faster than
     # a manual loop (for two to three names).
@@ -237,10 +236,11 @@ def _hash(values):
         # though, it needs at least one name. Make sure to return a tuple for
         # consistency.
         def attrgetter(_):
-            return 42,
+            return ()
     else:
         # This will return a tuple of the values of the names.
         attrgetter = operator.attrgetter(*names)
+
     def __hash__(self):
         h = seed
         if include_super:

diff --git a/src/nti/schema/tests/test_eqhash.py b/src/nti/schema/tests/test_eqhash.py
@@ -55,6 +55,15 @@ class ChildThingNoSuper(Thing):
 class ChildThingNoNames(Thing):
     pass
 
+@EqHash('a', 'b', 'c', 'd', 'e', 'f')
+class ManyThing(object):
+    a = 'a'
+    b = 'b'
+    c = 'c'
+    d = 'd'
+    e = 'e'
+    f = 'f'
+
 class TestEqHash(unittest.TestCase):
 
     def test_eq_hash(self):
@@ -224,9 +233,82 @@ def bench_hash():
     print("Avg Super2  hash", statistics.mean(times), "stddev", statistics.stdev(times))
 
 
+def bench_eq():
+    import timeit
+    import statistics
+
+
+    timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import Thing as Thing; thing=Thing(); thing2 = Thing()')
+    times = timer.repeat()
+    print("Avg Base  eq", statistics.mean(times), "stddev", statistics.stdev(times))
+
+    timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import ChildThing as Thing; thing=Thing(); thing2 = Thing()')
+    times = timer.repeat()
+    print("Avg Child eq", statistics.mean(times), "stddev", statistics.stdev(times))
+
+    timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import Thing2 as Thing; thing=Thing(); thing2 = Thing()')
+    times = timer.repeat()
+    print("Avg Super  eq", statistics.mean(times), "stddev", statistics.stdev(times))
+
+    timer = timeit.Timer('thing == thing', 'from nti.schema.tests.test_eqhash import Thing2 as Thing; thing=Thing(a={}); thing2 = Thing(a={})')
+#    import cProfile
+#    import pstats
+#    pr = cProfile.Profile()
+#    pr.enable()
+    times = timer.repeat()
+#    pr.disable()
+#    ps = pstats.Stats(pr).sort_stats('cumulative')
+#    ps.print_stats(.4)
+
+
+
+
+    print("Avg Super2  eq", statistics.mean(times), "stddev", statistics.stdev(times))
+
+    timer = timeit.Timer('thing == thing2', 'from nti.schema.tests.test_eqhash import ManyThing as Thing; thing=Thing(); thing2 = Thing()')
+    times = timer.repeat()
+    print("Avg many  eq", statistics.mean(times), "stddev", statistics.stdev(times))
+
+
+# Before
+#
+# Avg Base  eq 0.790581703186 stddev 0.00709198228224
+# Avg Child eq 1.44241364797 stddev 0.00058100921717
+# Avg Super  eq 0.772551695506 stddev 0.0120497874892
+# Avg Super2  eq 0.230642795563 stddev 0.00981929676758
+
+# Best attrgetter, params as keywords:
+# Avg Base  eq 0.57781457901 stddev 0.00472933000447
+# Avg Child eq 1.13719065984 stddev 0.00751860996924
+# Avg Super  eq 0.576888004939 stddev 0.0073053209526
+# Avg Super2  eq 0.221588929494 stddev 0.00154292380992
+
+# Code generation
+# Avg Base  eq 0.436311562856 stddev 0.0159609115497
+# Avg Child eq 0.93773595492 stddev 0.0244992238919
+# Avg Super  eq 0.443862199783 stddev 0.00816548353324
+# Avg Super2  eq 0.216485659281 stddev 0.00651497010124
+
+## Many attributes
+# Before
+# Avg Base  eq 0.762074232101 stddev 0.00893669830878
+# Avg Child eq 1.46989099185 stddev 0.0260021505811
+# Avg Super  eq 0.776515642802 stddev 0.011819047442
+# Avg Super2  eq 0.2257057031 stddev 0.0025750486944
+# Avg many  eq 1.56614136696 stddev 0.0195022584734
+
+# Code generation
+# Avg Base  eq 0.410983006159 stddev 0.00708241719015
+# Avg Child eq 0.903119166692 stddev 0.0051626944104
+# Avg Super  eq 0.41703470548 stddev 0.00604558003878
+# Avg Super2  eq 0.208957354228 stddev 0.00508863378261
+# Avg many  eq 0.797417243322 stddev 0.0198358058579
+
 if __name__ == '__main__':
     import sys
     if '--timehash' in sys.argv:
         bench_hash()
+    elif '--timeeq' in sys.argv:
+        bench_eq()
     else:
         unittest.main()