Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch progress bar #276

Merged
merged 16 commits into from
Jul 21, 2020
Merged
80 changes: 68 additions & 12 deletions strax/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
import typing as ty
import warnings

import contextlib
import sys
if any('jupyter' in arg for arg in sys.argv):
# In some cases we are not using any notebooks,
# Taken from 44952863 on stack overflow thanks!
from tqdm import tqdm_notebook as tqdm
else:
from tqdm import tqdm

import numexpr
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -751,6 +760,7 @@ def get_iter(self, run_id: str,
selection_str=None,
keep_columns=None,
_chunk_number=None,
progress_bar=True,
**kwargs) -> ty.Iterator[strax.Chunk]:
"""Compute target for run_id and iterate over results.

Expand Down Expand Up @@ -808,19 +818,64 @@ def get_iter(self, run_id: str,
max_messages=self.context_config['max_messages'],
timeout=self.context_config['timeout']).iter()

if progress_bar:
# Defining time ranges for the progress bar:
if time_range:
# user specified a time selection
start_time, end_time = time_range
else:
# If no selection is specified we have to get the last end_time:
start_time = 0
end_time = float('inf')
for t in strax.to_str_tuple(targets):
try:
# Sometimes some metadata might be missing e.g. during tests.
chunks = self.get_meta(run_id, t)['chunks']
start_time = max(start_time, chunks[0]['start'])
end_time = min(end_time, chunks[-1]['end'])
except (strax.DataNotAvailable, KeyError):
WenzDaniel marked this conversation as resolved.
Show resolved Hide resolved
progress_bar = False

# Define nice progressbar format:
bar_format = "{desc}: |{bar}| {percentage:.2f} % [{elapsed}<{remaining}],"\
" {postfix[0]} {postfix[1][spc]:.2f} s/chunk,"\
" #chunks processed: {postfix[1][n]}"
sec_per_chunk = np.nan # Have not computed any chunk yet.
post_fix = ['Rate last Chunk:', {'spc': sec_per_chunk, 'n': 0}]

try:
for result in strax.continuity_check(generator):
seen_a_chunk = True
if not isinstance(result, strax.Chunk):
raise ValueError(f"Got type {type(result)} rather than "
f"a strax Chunk from the processor!")
result.data = self.apply_selection(
result.data,
selection_str=selection_str,
keep_columns=keep_columns,
time_range=time_range,
time_selection=time_selection)
yield result
with contextlib.ExitStack() as stack:
if progress_bar:
# Get initial time
pbar = stack.enter_context(tqdm(total=1, postfix=post_fix, bar_format=bar_format))
last_time = pbar.last_print_t

for n_chunks, result in enumerate(strax.continuity_check(generator), 1):
seen_a_chunk = True
if not isinstance(result, strax.Chunk):
raise ValueError(f"Got type {type(result)} rather than "
f"a strax Chunk from the processor!")
result.data = self.apply_selection(
result.data,
selection_str=selection_str,
keep_columns=keep_columns,
time_range=time_range,
time_selection=time_selection)

if progress_bar:
# Update progressbar:
pbar.n = (result.end - start_time) / (end_time - start_time)
pbar.update(0)
# Now get last time printed and refresh seconds_per_chunk:
# This is a small work around since we do not know the
# pacmaker here.
WenzDaniel marked this conversation as resolved.
Show resolved Hide resolved
sec_per_chunk = pbar.last_print_t - last_time
pbar.postfix[1]['spc'] = sec_per_chunk
pbar.postfix[1]['n'] = n_chunks
pbar.refresh()
last_time = pbar.last_print_t

yield result

except GeneratorExit:
generator.throw(OutsideException(
Expand Down Expand Up @@ -1170,6 +1225,7 @@ def add_method(cls, f):
- touching: select things that (partially) overlap with the range
- skip: Do not select a time range, even if other arguments say so
:param _chunk_number: For internal use: return data from one chunk.
:param progress_bar: Display a progress bar if metedata exists.
"""

get_docs = """
Expand Down