Skip to content

Commit dd04d95

Browse files
committed
Add memoize decorator to improve performance.
The bottleneck for the `make_subplots` function was due to excessive lookups in the `plot_schema`. These lookups are actually pretty computation-intensive so caching these computations can give us a large performance boost. Note that Python 3.2+ has a new [`functools.lru_cache`](https://docs.python.org/3/library/functools.html#functools.lru_cache) which can be used for this. HOWEVER, we support Python 2.7+ and I didn’t see a backport for it. There are numerous `memoize` packages on PyPI, so many that I didn’t want to commit to one. It’s fairly simple to write this and then we don’t need another dependency.
1 parent adce1bb commit dd04d95

File tree

4 files changed

+146
-2
lines changed

4 files changed

+146
-2
lines changed

plotly/tests/test_core/test_utils/test_utils.py

+98-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from __future__ import absolute_import
22

3+
from inspect import getargspec
34
from unittest import TestCase
45

56
from requests.compat import json as _json
67

7-
from plotly.utils import PlotlyJSONEncoder, get_by_path, node_generator
8+
from plotly.utils import (PlotlyJSONEncoder, get_by_path, memoize,
9+
node_generator)
810

911

1012
class TestJSONEncoder(TestCase):
@@ -50,3 +52,98 @@ def test_node_generator(self):
5052
]
5153
for i, item in enumerate(node_generator(node0)):
5254
self.assertEqual(item, expected_node_path_tuples[i])
55+
56+
57+
class TestMemoizeDecorator(TestCase):
58+
59+
# In Python 2.x, globals should be module-scoped. By defining and
60+
# instantiating a class, we *access* the global first before attempting
61+
# to update a value. I.e., you *cannot* simply mutate the global value
62+
# on it's own.
63+
class Namespace(object):
64+
pass
65+
66+
def test_memoize(self):
67+
name_space = self.Namespace()
68+
name_space.call_count = 0
69+
70+
@memoize()
71+
def add(a, b):
72+
name_space.call_count += 1
73+
return a + b
74+
75+
tests = [[(1, 1), 2], [(2, 3), 5], [(3, -3), 0]]
76+
77+
self.assertEqual(name_space.call_count, 0)
78+
for i, (inputs, result) in enumerate(tests, 1):
79+
for _ in range(10):
80+
self.assertEqual(add(*inputs), result)
81+
self.assertEqual(name_space.call_count, i)
82+
83+
def test_memoize_maxsize(self):
84+
name_space = self.Namespace()
85+
name_space.call_count = 0
86+
87+
maxsize = 10
88+
89+
@memoize(maxsize=maxsize)
90+
def identity(a):
91+
name_space.call_count += 1
92+
return a
93+
94+
# Function hasn't been called yet, we should get *up to* maxsize cache.
95+
for i in range(maxsize):
96+
self.assertEqual(identity(i), i)
97+
self.assertEqual(name_space.call_count, i + 1)
98+
99+
# Nothing should have been discarded yet. no additional calls.
100+
for i in range(maxsize):
101+
self.assertEqual(identity(i), i)
102+
self.assertEqual(name_space.call_count, maxsize)
103+
104+
# Make a new call...
105+
self.assertEqual(identity(maxsize), maxsize)
106+
self.assertEqual(name_space.call_count, maxsize + 1)
107+
108+
# All but the first call should be remembered.
109+
for i in range(1, maxsize + 1):
110+
self.assertEqual(identity(i), i)
111+
self.assertEqual(name_space.call_count, maxsize + 1)
112+
113+
# The *initial* call should now be forgotten for each new call.
114+
for i in range(maxsize):
115+
self.assertEqual(identity(i), i)
116+
self.assertEqual(name_space.call_count, maxsize + 1 + i + 1)
117+
118+
def test_memoize_maxsize_none(self):
119+
name_space = self.Namespace()
120+
name_space.call_count = 0
121+
122+
@memoize(maxsize=None)
123+
def identity(a):
124+
name_space.call_count += 1
125+
return a
126+
127+
# Function hasn't been called yet, we should get *up to* maxsize cache.
128+
for i in range(400):
129+
self.assertEqual(identity(i), i)
130+
self.assertEqual(name_space.call_count, i + 1)
131+
132+
# Nothing should have been discarded. no additional calls.
133+
for i in range(400):
134+
self.assertEqual(identity(i), i)
135+
self.assertEqual(name_space.call_count, 400)
136+
137+
def test_memoize_function_info(self):
138+
# We use the decorator module to assure that function info is not
139+
# overwritten by the decorator.
140+
141+
@memoize()
142+
def foo(a, b, c='see?'):
143+
"""Foo is foo."""
144+
pass
145+
146+
self.assertEqual(foo.__doc__, 'Foo is foo.')
147+
self.assertEqual(foo.__name__, 'foo')
148+
self.assertEqual(getargspec(foo).args, ['a', 'b', 'c'])
149+
self.assertEqual(getargspec(foo).defaults, ('see?',))

plotly/utils.py

+46
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
import sys
1313
import threading
1414
import decimal
15+
from collections import deque
1516

1617
import pytz
18+
from decorator import decorator
1719
from requests.compat import json as _json
1820

1921
from plotly.optional_imports import get_module
@@ -444,3 +446,47 @@ def set_sharing_and_world_readable(option_set):
444446
option_set['world_readable'] = True
445447
else:
446448
option_set['world_readable'] = False
449+
450+
451+
def _default_memoize_key_function(*args, **kwargs):
452+
"""Factored out in case we want to allow callers to specify this func."""
453+
if kwargs:
454+
# frozenset is used to ensure hashability
455+
return args, frozenset(kwargs.items())
456+
else:
457+
return args
458+
459+
460+
def memoize(maxsize=128):
461+
"""
462+
Memoize a function by its arguments. Note, if the wrapped function returns
463+
a mutable result, the caller is responsible for *not* mutating the result
464+
as it will mutate the cache itself.
465+
466+
:param (int|None) maxsize: Limit the number of cached results. This is a
467+
simple way to prevent memory leaks. Setting this
468+
to `None` will remember *all* calls. The 128
469+
number is used for parity with the Python 3.2
470+
`functools.lru_cache` tool.
471+
472+
"""
473+
keys = deque()
474+
cache = {}
475+
476+
def _memoize(*all_args, **kwargs):
477+
func = all_args[0]
478+
args = all_args[1:]
479+
key = _default_memoize_key_function(*args, **kwargs)
480+
481+
if key in keys:
482+
return cache[key]
483+
484+
if maxsize is not None and len(keys) == maxsize:
485+
cache.pop(keys.pop())
486+
487+
result = func(*args, **kwargs)
488+
keys.appendleft(key)
489+
cache[key] = result
490+
return result
491+
492+
return decorator(_memoize)

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,5 @@ def readme():
4545
'plotly/matplotlylib/mplexporter',
4646
'plotly/matplotlylib/mplexporter/renderers'],
4747
package_data={'plotly': ['package_data/*']},
48-
install_requires=['requests', 'six', 'pytz'],
48+
install_requires=['decorator', 'requests', 'six', 'pytz'],
4949
zip_safe=False)

tox.ini

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ whitelist_externals=
5151
mkdir
5252
deps=
5353
coverage==4.3.1
54+
decorator==4.0.9
5455
mock==2.0.0
5556
nose==1.3.7
5657
requests==2.12.4

0 commit comments

Comments
 (0)