In [1]:
import numpy as np
import os

path = "/media/tianshu/data/radarhd2/"
ds = {}
ds_flat = {}
for building in sorted(os.listdir(path)):
    bs = {}
    for dataset in sorted(os.listdir(os.path.join(path, building))):
        indices = np.load(os.path.join(
            path, building, dataset, "_fusion", "indices.npz"))
        bs[dataset] = indices['indices'].shape[0]
    ds[building] = bs
    ds_flat.update(bs)

In [2]:
import textwrap

test_prefixes = {
    "morrison", "cfa"
}

train = []
ntrain = 0
test = []
ntest = 0

for building, traces in ds.items():
    for t, n in traces.items():
        t = os.path.join(building, t)
        if any(t.startswith(p) for p in test_prefixes):
            test.append(t)
            ntest += n
        else:
            train.append(t)
            ntrain += n

train_entry = ["traces: ["] + textwrap.wrap(
    ", ".join(["\"{}\"".format(t) for t in train]) + ",",
    width=77, initial_indent='  ', subsequent_indent='  '
) + ["]"]
test_entry = ["test: ["] + textwrap.wrap(
    ", ".join(["\"{}\"".format(t) for t in test]) + ",",
    width=77, initial_indent='  ', subsequent_indent='  '
) + ["]"]

print("# stats:")
print("#   train:", ntrain)
print("#   test:", ntest)
print("# _traces.yaml:")
print("\n".join(train_entry))
print("\n".join(test_entry))

# stats:
#   train: 256879
#   test: 21561
# _traces.yaml:
traces: [
  "baker/baker.1.fwd", "baker/baker.1.lat", "baker/baker.2.fwd",
  "baker/baker.2.lat", "cic/cic.1.fwd", "cic/cic.1.lat", "cic/cic.2.fwd",
  "cic/cic.2.lat", "cic/cic.4.fwd", "cic/cic.4.lat", "doherty/doherty.1.fwd",
  "doherty/doherty.1.lat", "doherty/doherty.a.fwd", "doherty/doherty.a.lat",
  "doherty/doherty.b.fwd", "doherty/doherty.b.lat", "gates/gates.3.fwd",
  "gates/gates.3.lat", "gates/gates.4.fwd", "gates/gates.4.lat",
  "gates/gates.5.fwd", "gates/gates.5.lat", "gates/gates.6.fwd",
  "gates/gates.6.lat", "gates/gates.7.fwd", "gates/gates.7.lat",
  "hamburg/hamburg.1.fwd", "hamburg/hamburg.1.lat", "hamburg/hamburg.2.fwd",
  "hamburg/hamburg.2.lat", "hamburg/hamburg.3.fwd", "hamburg/hamburg.3.lat",
  "hamburg/hamburg.a.fwd", "hamburg/hamburg.a.lat", "nsh/nsh.1.fwd",
  "nsh/nsh.1.lat", "nsh/nsh.2.fwd", "nsh/nsh.2.lat", "nsh/nsh.3.fwd",
  "nsh/nsh.3.lat", "nsh/nsh.4.fwd", "nsh/nsh.4.lat", "porter/porter.1.fwd",


In [3]:
from ipywidgets import widgets

col_text = [[] for _ in range(4)]
for building, traces in ds.items():

    col_len = [len(x) for x in col_text]
    col_idx = col_len.index(min(col_len))

    col_text[col_idx] += ([
        "{: <14} = {}".format(building, sum(traces.values()))
    ] + [
        "| {:14} {:5}   ".format(k, v) for k, v in traces.items()
    ] + [""])

col_widgets = []
for text in col_text:
    widget = widgets.Output()
    with widget:
        print("\n".join(text))
    col_widgets.append(widget)
hbox = widgets.HBox(col_widgets)

header = widgets.Output()
with header:
    total = sum(ds_flat.values())
    print("total: {} ({:.2f}h)".format(total, total / 10 / 60 / 60))

window = widgets.VBox([header, hbox])
display(window)

VBox(children=(Output(), HBox(children=(Output(), Output(), Output(), Output()))))