Skip to content

Commit

Permalink
compile_watch, standalones
Browse files Browse the repository at this point in the history
  • Loading branch information
jpn-- committed Mar 25, 2022
1 parent 1216033 commit a35bf68
Show file tree
Hide file tree
Showing 12 changed files with 403 additions and 11 deletions.
6 changes: 5 additions & 1 deletion activitysim/core/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .simulate_consts import SPEC_EXPRESSION_NAME, SPEC_LABEL_NAME
from . import inject, config
from .. import __version__
from ..core import tracing

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -242,7 +243,7 @@ def skim_dataset():
if zarr_file:
logger.info(f"did not find zarr skims, loading omx")
d = sh.dataset.from_omx_3d(
[openmatrix.open_file(f) for f in omx_file_paths],
[openmatrix.open_file(f, mode='r') for f in omx_file_paths],
time_periods=time_periods,
max_float_precision=max_float_precision,
)
Expand Down Expand Up @@ -724,6 +725,7 @@ def apply_flow(spec, choosers, locals_d=None, trace_label=None, required=False,
flow_result = flow.dot(
coefficients=spec.values.astype(np.float32),
dtype=np.float32,
compile_watch=True,
)
# TODO: are there remaining internal arrays in dot that need to be
# passed out to be seen by the dynamic chunker before they are freed?
Expand All @@ -739,4 +741,6 @@ def apply_flow(spec, choosers, locals_d=None, trace_label=None, required=False,
# index_keys = self.shared_data.meta_match_names_idx.keys()
# logger.debug(f"Flow._get_indexes: {index_keys}")
raise
if flow.compiled_recently:
tracing.timing_notes.add(f"compiled:{flow.name}")
return flow_result, flow
4 changes: 2 additions & 2 deletions activitysim/core/skim_dict_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def load_skim_info(self, skim_tag):

logger.debug(f"load_skim_info {skim_tag} reading {omx_file_path}")

with omx.open_file(omx_file_path) as omx_file:
with omx.open_file(omx_file_path, mode='r') as omx_file:

# fixme call to omx_file.shape() failing in windows p3.5
if self.omx_shape is None:
Expand Down Expand Up @@ -273,7 +273,7 @@ def _read_skims_from_omx(self, skim_info, skim_data):
logger.info(f"_read_skims_from_omx {omx_file_path}")

# read skims into skim_data
with omx.open_file(omx_file_path) as omx_file:
with omx.open_file(omx_file_path, mode='r') as omx_file:
for skim_key, omx_key in omx_keys.items():

if omx_manifest[omx_key] == omx_file_path:
Expand Down
8 changes: 6 additions & 2 deletions activitysim/core/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@


logger = logging.getLogger(__name__)
timing_notes = set()


class ElapsedTimeFormatter(logging.Formatter):
Expand Down Expand Up @@ -66,6 +67,7 @@ def print_elapsed_time(msg=None, t0=None, debug=False):


def log_runtime(model_name, start_time=None, timing=None):
global timing_notes

assert (start_time or timing) and not (start_time and timing)

Expand All @@ -85,10 +87,12 @@ def log_runtime(model_name, start_time=None, timing=None):
if not inject.get_injectable('locutor', False):
return

header = "process_name,model_name,seconds,minutes"
header = "process_name,model_name,seconds,minutes,notes"
note = " ".join(timing_notes)
with config.open_log_file('timing_log.csv', 'a', header) as log_file:
print(f"{process_name},{model_name},{seconds},{minutes}", file=log_file)
print(f"{process_name},{model_name},{seconds},{minutes},{note}", file=log_file)

timing_notes.clear()

def delete_output_files(file_type, ignore=None, subdir=None):
"""
Expand Down
Empty file.
161 changes: 161 additions & 0 deletions activitysim/standalone/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import altair as alt
import pandas as pd
import os
from .pipeline import load_checkpointed_tables

def load_pipelines(pipelines, tables=None, checkpoint_name=None):
"""
Parameters
----------
pipelines : Dict[Str, Path-like]
Mapping run name to path of pipeline file.
checkpoint : str
Name of checkpoint to load for all pipelines
"""
return {
key: load_checkpointed_tables(
pth,
tables=tables,
checkpoint_name=checkpoint_name,
)
for key, pth in pipelines.items()
}


def load_final_tables(output_dirs, tables=None, index_cols=None):
result = {}
for key, pth in output_dirs.items():
result[key] = {}
for tname, tfile in tables.items():
tpath = os.path.join(pth, tfile)
kwargs = {}
if index_cols is not None and tname in index_cols:
kwargs['index_col'] = index_cols[tname]
result[key][tname] = pd.read_csv(tpath, **kwargs)
return result


def compare_trip_mode_choice(tablesets, title="Trip Mode Choice", grouping='primary_purpose'):

d = {}
groupings = [grouping,]

for key, tableset in tablesets.items():
df = tableset['trips'].groupby(
groupings + ['trip_mode']
).size().rename('n_trips').reset_index()
df['share_trips'] = df['n_trips'] / df.groupby(groupings)['n_trips'].transform('sum')
d[key] = df

all_d = pd.concat(d, names=['source']).reset_index()

selection = alt.selection_point(
fields=['trip_mode'], bind='legend',
)

fig = alt.Chart(
all_d
).mark_bar(
).encode(
color='trip_mode',
y=alt.Y('source', axis=alt.Axis(grid=False, title='')),
x=alt.X('share_trips', axis=alt.Axis(grid=False, labels=False, title='Mode Share')),
row='primary_purpose',
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
tooltip = ['trip_mode', 'source', 'n_trips', alt.Tooltip('share_trips:Q', format='.2%')],
).add_selection(
selection,
)

if title:
fig = fig.properties(
title=title
).configure_title(
fontSize=20,
anchor='start',
color='black',
)

return fig


def compare_trip_distance(
tablesets,
skims,
dist_skim_name,
otaz_col='origin',
dtaz_col='destination',
time_col='depart',
dist_bins=20,
grouping='primary_purpose',
title="Trip Length Distribution",
max_dist=None,
):
groupings = [grouping]
if not isinstance(skims, dict):
skims = {i: skims for i in tablesets.keys()}

distances = {}
for key, tableset in tablesets.items():
skim_dist = skims[key][[dist_skim_name]]
looks = [
tableset['trips'][otaz_col].rename('otaz'),
tableset['trips'][dtaz_col].rename('dtaz'),
]
if 'time_period' in skim_dist.dims:
looks.append(
tableset['trips'][time_col].apply(skims[key].attrs['time_period_imap'].get).rename('time_period'),
)
look = pd.concat(looks, axis=1)
distances[key] = skims[key][[dist_skim_name]].iat.df(look)

if dist_bins is not None:
result = pd.concat(distances, names=['source'])
if max_dist is not None:
result = result[result <= max_dist]
result = pd.cut(result.iloc[:, 0], dist_bins).to_frame()
distances = {k:result.loc[k] for k in tablesets.keys()}

data = {}
for key, tableset in tablesets.items():
data[key] = tableset['trips'].assign(**{'distance': distances[key]})

d = {}
for key, dat in data.items():
df = dat.groupby(
groupings + ['distance']
).size().rename('n_trips').unstack('distance').fillna(0).stack().rename('n_trips').reset_index()
df['share_trips'] = df['n_trips'] / df.groupby(groupings)['n_trips'].transform('sum')
d[key] = df

all_d = pd.concat(d, names=['source']).reset_index()
all_d['distance'] = all_d['distance'].apply(lambda x: x.mid)

fig = alt.Chart(
all_d
).mark_line(
interpolate='monotone',
).encode(
color='source',
y=alt.Y('share_trips', axis=alt.Axis(grid=False, title='')),
x=alt.X('distance', axis=alt.Axis(grid=False, title='Distance')),
#opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
#tooltip = ['trip_mode', 'source', 'n_trips', alt.Tooltip('share_trips:Q', format='.2%')],
facet=alt.Facet(grouping, columns=3),
strokeWidth = 'source',
).properties(
width=200,
height=120,
)

if title:
fig = fig.properties(
title=title
).configure_title(
fontSize=20,
anchor='start',
color='black',
)

return fig

42 changes: 42 additions & 0 deletions activitysim/standalone/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pandas as pd

from ..core import pipeline

def load_checkpointed_tables(
pipeline_file_path,
tables=None,
checkpoint_name=None,
):
pipeline_store = pd.HDFStore(pipeline_file_path, mode='r')

checkpoints = pipeline_store[pipeline.CHECKPOINT_TABLE_NAME]

# checkpoint row as series
if checkpoint_name is None:
checkpoint = checkpoints.iloc[-1]
checkpoint_name = checkpoint.loc[pipeline.CHECKPOINT_NAME]
else:
i = checkpoints.set_index(pipeline.CHECKPOINT_NAME).index.get_loc(checkpoint_name)
checkpoint = checkpoints.iloc[i]

# series with table name as index and checkpoint_name as value
checkpoint_tables = checkpoint[~checkpoint.index.isin(pipeline.NON_TABLE_COLUMNS)]

# omit dropped tables with empty checkpoint name
checkpoint_tables = checkpoint_tables[checkpoint_tables != '']

# hdf5 key is <table_name>/<checkpoint_name>
checkpoint_tables = {
table_name: pipeline.pipeline_table_key(table_name, checkpoint_name)
for table_name, checkpoint_name in checkpoint_tables.items()
}

data = {}
for table_name, table_key in checkpoint_tables.items():
if tables is None or table_name in tables:
data[table_name] = pipeline_store[table_key]

pipeline_store.close()

# checkpoint name and series mapping table name to hdf5 key for tables in that checkpoint
return checkpoint_name, data
89 changes: 89 additions & 0 deletions activitysim/standalone/render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import nbformat as nbf
from nbconvert import HTMLExporter
import nbclient

# from jupyter_contrib_nbextensions.nbconvert_support import TocExporter # problematic

def render_notebook(nb_filename, cellcontent):

nb = nbf.v4.new_notebook()

cells = []
for c in cellcontent:
if c[:4] == "[md]":
cells.append(nbf.v4.new_markdown_cell(c[4:]))
else:
cells.append(nbf.v4.new_code_cell(c))
nb['cells'] = cells

nb = nbclient.execute(nb)

html_exporter = HTMLExporter(
embed_images=True,
exclude_input_prompt=True,
exclude_output_prompt=True,
exclude_input=True,
# template_name = 'classic'
)
(body, resources) = html_exporter.from_notebook_node(nb)

with open(nb_filename, 'w') as f:
f.write(body)


def render_comparison(html_filename, title, dist_skim="DIST"):

cells = [
f"[md]# {title}",

"""\
from activitysim.standalone.compare import load_final_tables, compare_trip_mode_choice, compare_trip_distance
from activitysim.standalone.skims import load_skims
""",

"""\
data = load_final_tables(
{"sharrow": "output-sharrow", "legacy": "output-legacy"},
{"trips": "final_trips.csv"},
{"trips": "trip_id"},
)
""",

f"[md]## Trip Mode Choice",

"""\
compare_trip_mode_choice(data)
""",
]

if dist_skim:
dist_cells = [
f"[md]## Trip Distance",

f"""\
skims = load_skims("../configs/network_los.yaml", "../data")
compare_trip_distance(
data,
skims,
"{dist_skim}",
max_dist=10,
title="Trip Length Distribution <10 miles",
)
""",

f"""\
compare_trip_distance(
data,
skims,
"{dist_skim}",
title="Trip Length Distribution Overall",
)
""",
]
else:
dist_cells = []

render_notebook(
html_filename,
cells + dist_cells
)

0 comments on commit a35bf68

Please sign in to comment.