Merge pull request #26 from edgarsi/adhere-to-time-thresh

Adhere to 'time_thresh' dynamically
Erotemic · Nov 1, 2022 · 19bcfc7 · 19bcfc7
2 parents 2b219b7 + 2fc74ed
commit 19bcfc7
Show file tree

Hide file tree

Showing 6 changed files with 221 additions and 137 deletions.
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# IDE project settings
+.idea/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,11 @@ We are currently working on porting this changelog to the specifications in
 [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Version: 1.1.0] - Unreleased 
+
+### Fixed
+* `time_thresh` is now respected when calculating dynamic display updates
+
 
 ## [Version: 1.0.1] - Unreleased 
 

diff --git a/progiter/__init__.py b/progiter/__init__.py
@@ -54,7 +54,7 @@
 """
 from .progiter import (ProgIter,)
 
-__version__ = '1.0.1'
+__version__ = '1.1.0'
 __all__ = [
     'ProgIter',
 ]
diff --git a/progiter/progiter.py b/progiter/progiter.py
@@ -250,7 +250,7 @@ class ProgIter(_TQDMCompat, _BackwardsCompat):
 
     ProgIter is an alternative to `tqdm`. ProgIter implements much of the
     tqdm-API.  The main difference between `ProgIter` and `tqdm` is that
-    ProgIter does not use threading where as `tqdm` does.
+    ProgIter does not use threading whereas `tqdm` does.
 
     Attributes:
         iterable (List | Iterable):
@@ -267,10 +267,6 @@ class ProgIter(_TQDMCompat, _BackwardsCompat):
             How many iterations to wait between messages.
             Defaults to 1.
 
-        adjust (bool):
-            if True freq is adjusted based on time_thresh
-            Defaults to True.
-
         eta_window (int):
             number of previous measurements to use in eta calculation, default=64
 
@@ -325,7 +321,7 @@ class ProgIter(_TQDMCompat, _BackwardsCompat):
 
     Note:
         ProgIter is an alternative to `tqdm`.  The main difference between
-        `ProgIter` and `tqdm` is that ProgIter does not use threading where as
+        `ProgIter` and `tqdm` is that ProgIter does not use threading whereas
         `tqdm` does.  `ProgIter` is simpler than `tqdm` and thus more stable in
         certain circumstances.
 
@@ -470,15 +466,31 @@ def _iterate(self):
         if not self.started:
             self.begin()
         # Wrap input sequence in a generator
-        for self._iter_idx, item in enumerate(self.iterable, start=self.initial + 1):
-            yield item
-            # Call the body of step to reduce overyead
-            # self.step(0)  # inc is 0 because we already updated
-            if (self._iter_idx) % self.freq == 0:
-                # update progress information every so often
-                self._update_measurements()
-                self._update_estimates()
-                self.display_message()
+        gen = enumerate(self.iterable, start=self.initial + 1)
+        # Iterating is performance sensitive, so separate both cases - where
+        # 'freq' is used and checks can be fast, and where 'adjust' is used and
+        # checks need more calculation. This is worth duplicating code for.
+        if self.adjust:
+            for self._iter_idx, item in gen:
+                yield item
+
+                between_idx = (self._iter_idx - self._now_idx)
+                need_display = between_idx >= self.freq
+
+                # No clue how much time has passed, the frequency may be way off.
+                # If 'freq' is too large, checking time is necessary to notice it.
+                if not need_display:
+                    between_time = default_timer() - self._now_time
+                    need_display = between_time >= self.time_thresh
+
+                if need_display:
+                    # update progress information every so often
+                    self._update_and_display_message()
+        else:
+            for self._iter_idx, item in gen:
+                yield item
+                if self._iter_idx % self.freq == 0:  # very low overhead
+                    self._update_and_display_message()
         self.end()
 
     def step(self, inc=1, force=False):
@@ -507,12 +519,18 @@ def step(self, inc=1, force=False):
         """
         if not self.enabled:
             return
+
         self._iter_idx += inc
-        _between_idx = (self._iter_idx - self._now_idx)
-        if force or _between_idx >= self.freq:
-            self._update_measurements()
-            self._update_estimates()
-            self.display_message()
+
+        between_idx = (self._iter_idx - self._now_idx)
+        need_display = force or between_idx >= self.freq
+
+        if self.adjust and not need_display:
+            between_time = default_timer() - self._now_time
+            need_display = between_time >= self.time_thresh
+
+        if need_display:
+            self._update_and_display_message()
 
     def _reset_internals(self):
         """
@@ -527,13 +545,11 @@ def _reset_internals(self):
         self._iter_idx = self.initial
         self._last_idx = self.initial - 1
         # now time is actually not right now
-        # now refers the the most recent measurement
+        # now refers to the most recent measurement
         # last refers to the measurement before that
         self._now_idx = self.initial
         self._now_time = 0
         self._between_count = -1
-        self._max_between_time = -1.0
-        self._max_between_count = -1.0
         self._iters_per_second = 0.0
         self._update_message_template()
 
@@ -548,7 +564,7 @@ def begin(self):
         Initializes information used to measure progress
 
         This only needs to be used if this ProgIter is not wrapping an iterable.
-        Does nothing if the this ProgIter is disabled.
+        Does nothing if this ProgIter is disabled.
 
         Returns:
             ProgIter:
@@ -586,15 +602,14 @@ def end(self):
         Signals that iteration has ended and displays the final message.
 
         This only needs to be used if this ProgIter is not wrapping an
-        iterable.  Does nothing if the this ProgIter object is disabled or has
+        iterable.  Does nothing if this ProgIter object is disabled or has
         already finished.
         """
         if not self.enabled or self.finished:
             return
         # Write the final progress line if it was not written in the loop
         if self._iter_idx != self._now_idx:
-            self._update_measurements()
-            self._update_estimates()
+            self._update_all_calculations()
             self._est_seconds_left = 0
             self.display_message()
         self.ensure_newline()
@@ -605,17 +620,12 @@ def _adjust_frequency(self):
         # Adjust frequency so the next print will not happen until
         # approximately `time_thresh` seconds have passed as estimated by
         # iter_idx.
-        eps = 1E-9
-        self._max_between_time = max(self._max_between_time,
-                                     self._between_time)
-        self._max_between_time = max(self._max_between_time, eps)
-        self._max_between_count = max(self._max_between_count,
-                                      self._between_count)
 
         # If progress was uniform and all time estimates were
         # perfect this would be the new freq to achieve self.time_thresh
-        new_freq = int(self.time_thresh * self._max_between_count /
-                       self._max_between_time)
+        eps = 1E-9
+        new_freq = int(self.time_thresh * self._between_count /
+                       max(eps, self._between_time))
         # But things are not perfect. So, don't make drastic changes
         rel_limit = self.rel_adjust_limit
         max_freq = int(self.freq * rel_limit)
@@ -627,7 +637,7 @@ def _update_measurements(self):
         update current measurements and estimated of time and progress
         """
         self._last_idx = self._now_idx
-        self._last_time  = self._now_time
+        self._last_time = self._now_time
 
         self._now_idx = self._iter_idx
         self._now_time = default_timer()
@@ -636,7 +646,10 @@ def _update_measurements(self):
         self._between_count = self._now_idx - self._last_idx
         self._total_seconds = self._now_time - self._start_time
 
-        # Record that measures were updated
+        # Adjust frequency to stay within time_thresh
+        if self.adjust and (self._between_time < self.time_thresh or
+                            self._between_time > self.time_thresh * 2.0):
+            self._adjust_frequency()
 
     def _update_estimates(self):
         # Estimate rate of progress
@@ -653,13 +666,15 @@ def _update_estimates(self):
             # Estimate time remaining if total is given
             iters_left = self.total - self._now_idx
             est_eta = iters_left / self._iters_per_second
-            self._est_seconds_left  = est_eta
+            self._est_seconds_left = est_eta
 
-        # Adjust frequency if printing too quickly
-        # so progress does not slow down actual function
-        if self.adjust and (self._between_time < self.time_thresh or
-                            self._between_time > self.time_thresh * 2.0):
-            self._adjust_frequency()
+    def _update_all_calculations(self):
+        self._update_measurements()
+        self._update_estimates()
+
+    def _update_and_display_message(self):
+        self._update_all_calculations()
+        self.display_message()
 
     def _update_message_template(self):
         self._msg_fmtstr = self._build_message_template()

diff --git a/tests/benchmark.py b/tests/benchmark.py
@@ -0,0 +1,94 @@
+import ubelt as ub
+from progiter import ProgIter
+
+
+def time_progiter_overhead():
+    # Time the overhead of this function
+    import timeit
+    import textwrap
+    setup = textwrap.dedent(
+        '''
+        from progiter import ProgIter
+        import numpy as np
+        import time
+        from six.moves import StringIO
+        N = 500
+        file = StringIO()
+        rng = np.random.RandomState(42)
+        ndims = 2
+        vec1 = rng.rand(113, ndims)
+        vec2 = rng.rand(71, ndims)
+
+        def minimal_wraper1(sequence):
+            for item in sequence:
+                yield item
+
+        def minimal_wraper2(sequence):
+            for count, item in enumerate(sequence, start=1):
+                yield item
+
+        def minimal_wraper3(sequence):
+            count = 0
+            for item in sequence:
+                yield item
+                count += 1
+
+        def minwrap4(sequence):
+            for count, item in enumerate(sequence, start=1):
+                yield item
+                if count % 100:
+                    pass
+
+        def minwrap5(sequence):
+            for count, item in enumerate(sequence, start=1):
+                yield item
+                if time.time() < 100:
+                    pass
+
+        def step_through(prog):
+            prog.begin()
+            for item in range(prog.total):
+                prog.step()
+                yield item
+            prog.end()
+        '''
+    )
+    statements = {
+        'baseline'         : '[{work} for n in range(N)]',
+        'creation'         : 'ProgIter(range(N))',
+        'minwrap1'         : '[{work} for n in minimal_wraper1(range(N))]',
+        'minwrap2'         : '[{work} for n in minimal_wraper2(range(N))]',
+        'minwrap3'         : '[{work} for n in minimal_wraper3(range(N))]',
+        'minwrap4'         : '[{work} for n in minwrap4(range(N))]',
+        'minwrap5'         : '[{work} for n in minwrap5(range(N))]',
+        '(sk-disabled)'    : '[{work} for n in ProgIter(range(N), enabled=False, file=file)]',  # NOQA
+        '(sk-plain)'       : '[{work} for n in ProgIter(range(N), file=file)]',  # NOQA
+        '(sk-freq)'        : '[{work} for n in ProgIter(range(N), file=file, freq=100)]',  # NOQA
+        '(sk-no-adjust)'   : '[{work} for n in ProgIter(range(N), file=file, adjust=False, freq=200)]',  # NOQA
+        '(sk-high-freq)'   : '[{work} for n in ProgIter(range(N), file=file, adjust=False, freq=200)]',  # NOQA
+
+        '(step-plain)'      : '[{work} for n in step_through(ProgIter(total=N, file=file))]',  # NOQA
+        '(step-freq)'      : '[{work} for n in step_through(ProgIter(total=N, file=file, freq=100))]',  # NOQA
+        '(step-no-adjust)' : '[{work} for n in step_through(ProgIter(total=N, file=file, adjust=False, freq=200))]',  # NOQA
+    }
+    timeings = {}
+
+    work_strs = [
+        'None',
+        'vec1.dot(vec2.T)',
+        'n % 10 == 0',
+    ]
+    work = work_strs[0]
+
+    number = 10000
+    prog = ProgIter(desc='timing', adjust=True)
+    for key, stmt in prog(statements.items()):
+        prog.set_extra(key)
+        secs = timeit.timeit(stmt.format(work=work), setup, number=number)
+        timeings[key] = secs / number
+
+    print(ub.repr2(timeings, precision=8, align=':'))
+
+
+if __name__ == '__main__':
+    time_progiter_overhead()