Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Hierarchical timer class #96

Merged
merged 15 commits into from
Apr 6, 2020
Merged
33 changes: 33 additions & 0 deletions pyutilib/misc/tests/test_hierarchical_timer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pyutilib.misc.timing import HierarchicalTimer
import pyutilib.th as unittest


class TestHierarchicalTimer(unittest.TestCase):
def test_hierarchical_timer(self):
timer = HierarchicalTimer()
timer.start_increment('all')
for i in range(10):
timer.start_increment('a')
for i in range(5):
timer.start_increment('aa')
timer.stop_increment('aa')
timer.start_increment('ab')
timer.stop_increment('ab')
timer.stop_increment('a')
timer.start_increment('b')
timer.stop_increment('b')
timer.start_increment('a')
with self.assertRaisesRegex(ValueError, 'all is not the currently active timer. The only timer that can currently be stopped is all.a'):
timer.stop_increment('all')
timer.stop_increment('a')
timer.stop_increment('all')

a_percent = timer.get_relative_percent_time('all.a')
aa_percent = timer.get_relative_percent_time('all.a.aa')
aa_total_percent = timer.get_total_percent_time('all.a.aa')
self.assertAlmostEqual(aa_total_percent, a_percent/100 * aa_percent/100 * 100)
self.assertAlmostEqual(timer.get_num_calls('all.a'), 11)
self.assertAlmostEqual(timer.get_num_calls('all.a.ab'), 10)
self.assertAlmostEqual(timer.get_num_calls('all.a.aa'), 50)
timer.get_total_time('all.b')
print(timer)
302 changes: 302 additions & 0 deletions pyutilib/misc/timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,305 @@ def start(self):
_globalTimer = TicTocTimer()
tic = _globalTimer.tic
toc = _globalTimer.toc


class _HierarchicalHelper(object):
def __init__(self):
self.total_time = 0
self.t0 = None
self.n_calls = 0
self.timers = dict()

def start_increment(self):
self.t0 = time.time()

def stop_increment(self):
self.total_time += time.time() - self.t0
self.n_calls += 1

def pprint(self, indent):
s = ''
if len(self.timers) > 0:
max_name_len = 12
for name in self.timers.keys():
if len(name) > max_name_len:
max_name_len = len(name)
name_formatter = '{name:<' + str(max_name_len) + '}'
s += indent
s += (name_formatter +
'{total_time:>15}'
'{num_calls:>15}'
'{time_per_call:>15}'
'{relative_percent:>15}\n').format(name='Identifier',
total_time='Time (s)',
num_calls='# Calls',
time_per_call='Per Call(s)',
relative_percent='% Time')
s += indent + '-' * (max_name_len + 60) + '\n'
other_time = self.total_time
for name, timer in self.timers.items():
s += indent
s += name_formatter.format(name=name)
s += '{0:>15.2e}'.format(timer.total_time)
s += '{0:>15d}'.format(timer.n_calls)
s += '{0:>15.2e}'.format(timer.total_time/timer.n_calls)
s += '{0:>15.1f}%\n'.format(timer.total_time/self.total_time*100)
s += timer.pprint(indent=indent + ' ')
other_time -= timer.total_time
s += indent
s += name_formatter.format(name='other')
s += '{0:>15.2e}'.format(other_time)
s += '{0:>15}'.format('N/A')
s += '{0:>15}'.format('N/A')
s += '{0:>15.1f}%\n'.format(other_time / self.total_time * 100)
return s


class HierarchicalTimer(object):
"""
A class for hierarchical timing.

Examples
--------
>>> from pyutilib.misc.timing import HierarchicalTimer
>>> timer = HierarchicalTimer()
>>> timer.start_increment('all')
>>> for i in range(10):
>>> timer.start_increment('a')
>>> for i in range(5):
>>> timer.start_increment('aa')
>>> timer.stop_increment('aa')
>>> timer.start_increment('ab')
>>> timer.stop_increment('ab')
>>> timer.stop_increment('a')
>>> for i in range(10):
>>> timer.start_increment('b')
>>> timer.stop_increment('b')
>>> timer.stop_increment('all')
>>> print(timer)
Identifier Time (s) # Calls Per Call (s)
---------------------------------------------------------
all 1.29e-01 1 1.29e-01
Identifier Time (s) # Calls Per Call(s) % Time
------------------------------------------------------------------------
a 1.15e-01 10 1.15e-02 89.0%
Identifier Time (s) # Calls Per Call(s) % Time
------------------------------------------------------------------------
aa 6.60e-02 50 1.32e-03 57.3%
ab 4.84e-02 10 4.84e-03 42.0%
other 8.00e-04 N/A N/A 0.7%
b 1.39e-02 10 1.39e-03 10.8%
other 3.49e-04 N/A N/A 0.3%
Copy link
Contributor

@jsiirola jsiirola Apr 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you consider changing this format to more closely match the results from the Python profiler? For reference, the profiler generates:

Function                                             called...
                                                         ncalls  tottime  cumtime
{posix.waitpid}                                      ->
sre_compile.py:228(_compile_charset)                 ->     122    0.026    0.065  sre_compile.py:256(_optimize_charset)
                                                            502    0.000    0.000  {method 'append' of 'list' objects}
                                                             20    0.000    0.000  {method 'extend' of 'list' objects}

I am thinking about a format similar to:

Identifier                               ncalls  tottime  percall      %
------------------------------------------------------------------------
all                                           1    0.129    0.129  100.0
    --------------------------------------------------------------------
    a                                        10    0.115    0.115   89.0
        ----------------------------------------------------------------
        aa                                   50    0.066    0.001   57.3
        ab                                   10    0.048    0.005   42.0
        other                               n/a    0.001      n/a    0.7
        ================================================================
    b                                        10    0.014    0.014   10.8
    other                                   n/a    0.000      n/a    0.3
    ====================================================================
========================================================================

...I am happy to draft the patch if you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this output way more than what I had. I honestly couldn't come up with anything I really liked, so I just gave up.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can take a stab at it right now.

>>>
>>>
>>> print('a total time: ', timer.get_total_time('all.a'))
a total time: 0.11518359184265137
>>>
>>> print('ab num calls: ', timer.get_num_calls('all.a.ab'))
ab num calls: 10
>>>
>>> print('aa % time: ', timer.get_relative_percent_time('all.a.aa'))
aa % time: 57.28656738043737
>>>
>>> print('aa % of total time: ', timer.get_total_percent_time('all.a.aa'))
aa % of total time: 50.96888018003749

Internal Workings
-----------------
The HierarchicalTimer use a stack to track which timers are active
at any point in time. Additionally, each timer has a dictionary of
timers for its children timers. Consider

>>> timer = HierarchicalTimer()
>>> timer.start_increment('all')
>>> timer.start_increment('a')
>>> timer.start_increment('aa')

After the above code is run, self.stack will be ['all', 'a', 'aa']
and self.timers will have one key, 'all' and one value which will
be a _HierarchicalHelper. The _HierarchicalHelper has its own timers dictionary:

{'a': _HierarchicalHelper}

and so on. This way, we can easily access any timer with something
that looks like the stack. The logic is recursive (although the
code is not).

"""
def __init__(self):
self.stack = list()
self.timers = dict()

def _get_timer(self, identifier, should_exist=False):
"""

This method gets the timer associated with the current state
of self.stack and the specified identifier.

Parameters
----------
identifier: str
The name of the timer
should_exist: bool
The should_exist is True, and the timer does not already
exist, an error will be raised. If should_exist is False, and
the timer does not already exist, a new timer will be made.

Returns
-------
timer: _HierarchicalHelper

"""
parent = self._get_timer_from_stack(self.stack)
if identifier in parent.timers:
return parent.timers[identifier]
else:
if should_exist:
raise RuntimeError('Could not find timer {0}'.format('.'.join(self.stack + [identifier])))
parent.timers[identifier] = _HierarchicalHelper()
return parent.timers[identifier]

def start_increment(self, identifier):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TicTocTimer uses start() and stop(). Can we make the two APIs consistent?

"""
Start incrementing the timer identified with identifier

Parameters
----------
identifier: str
The name of the timer
"""
timer = self._get_timer(identifier)
timer.start_increment()
self.stack.append(identifier)

def stop_increment(self, identifier):
"""
Stop incrementing the timer identified with identifier

Parameters
----------
identifier: str
The name of the timer
"""
if identifier != self.stack[-1]:
raise ValueError(str(identifier) + ' is not the currently active timer. The only timer that can currently be stopped is ' + '.'.join(self.stack))
self.stack.pop()
timer = self._get_timer(identifier, should_exist=True)
timer.stop_increment()

def __str__(self):
max_name_len = 12
for name in self.timers.keys():
if len(name) > max_name_len:
max_name_len = len(name)
name_formatter = '{name:<' + str(max_name_len) + '}'
s = (name_formatter +
'{total_time:>15}'
'{num_calls:>15}'
'{time_per_call:>15}\n').format(name='Identifier',
total_time='Time (s)',
num_calls='# Calls',
time_per_call='Per Call (s)')
s += '-' * (max_name_len + 45) + '\n'
for name, timer in self.timers.items():
s += name_formatter.format(name=name)
s += '{0:>15.2e}'.format(timer.total_time)
s += '{0:>15d}'.format(timer.n_calls)
s += '{0:>15.2e}\n'.format(timer.total_time/timer.n_calls)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can n_calls ever be 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, n_calls can never be 0. The timer would just not exist in that case.

s += timer.pprint(indent=' ')
return s

def reset(self):
"""
Completely reset the timer.
"""
self.stack = list()
self.timers = dict()

def _get_timer_from_stack(self, stack):
"""
This method gets the timer associated with stack.

Parameters
----------
stack: list of str
A list of identifiers.

Returns
-------
timer: _HierarchicalHelper
"""
tmp = self
for i in stack:
tmp = tmp.timers[i]
return tmp

def get_total_time(self, identifier):
"""
Parameters
----------
identifier: str
The full name of the timer including parent timers separated with dots.

Returns
-------
total_time: float
The total time spent with the specified timer active.
"""
stack = identifier.split('.')
timer = self._get_timer_from_stack(stack)
return timer.total_time

def get_num_calls(self, identifier):
"""
Parameters
----------
identifier: str
The full name of the timer including parent timers separated with dots.

Returns
-------
num_calss: int
The number of times start_increment was called for the specified timer.
"""
stack = identifier.split('.')
timer = self._get_timer_from_stack(stack)
return timer.n_calls

def get_relative_percent_time(self, identifier):
"""
Parameters
----------
identifier: str
The full name of the timer including parent timers separated with dots.

Returns
-------
percent_time: float
The percent of time spent in the specified timer
relative to the timer's immediate parent.
"""
stack = identifier.split('.')
timer = self._get_timer_from_stack(stack)
parent = self._get_timer_from_stack(stack[:-1])
return timer.total_time / parent.total_time * 100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For timing very short times on machines with low precision, total_time can be 0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed this in the printing methods. However, I think a division by 0 error is appropriate here. Other methods are available for users to easily get the numerator and denominator individually. Although, given what I just said, it might make sense to remove the percent methods. Thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I think that either get_relative_percent should return 0 when total_time is 0 so that clients that are doing the equivalent of '%d' % (t.get_relative_percent) will not get exceptions. It is numerically incorrect, but doesn't meaningfully change the interpretation of the results.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another edge case: if you call get_relative_percent from the top of the stack, you should get 100.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. That should now be fixed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree - I think returning 0 from get_relative_percent_time when parent.total_time is 0 does change the interpretation of the results. I would rather the user get an exception. However, I am happy to remove this method completely and let users do what they want manually (with calls to get_total_time).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, we could return math.nan, which is accurate and does not raise an exception. Thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michaelbynum Great idea! Just make it float('nan') (for compatibility).


def get_total_percent_time(self, identifier):
"""
Parameters
----------
identifier: str
The full name of the timer including parent timers separated with dots.

Returns
-------
percent_time: float
The percent of time spent in the specified timer
relative to the total time in all timers.
"""
stack = identifier.split('.')
timer = self._get_timer_from_stack(stack)
total_time = 0
for _timer in self.timers.values():
total_time += _timer.total_time
return timer.total_time / total_time * 100