# Using Sparse MAZ Skims

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import sharrow as sh

This notebook walks through using sparse MAZ to MAZ skims with sharrow.
The example data we'll use to demonstrate this feature starts with regular
TAZ-based skims.

In [None]:
skims = sh.example_data.get_skims()
skims

We'll also load a MAZ-to-TAZ mapping file, which defines the MAZ's and 
which TAZ is used for each MAZ.

In [None]:
maz_taz = sh.example_data.get_maz_to_taz()
maz_taz

Lastly, we'll load a sparse MAZ-to-MAZ skim table.  This table
defines origin and destination MAZ's, and the walk distance
between them.  The data is "sparse" in that only a limited number
of OMAZ-DMAZ pairs are included.  Unlike traditional sparse arrays,
the missing elements are not assumed to be zero, but instead we 
implicitly use the walk distance from the matching TAZ's in the
TAZ-based skims for those zone pairs.

In [None]:
maz_to_maz_walk = sh.example_data.get_maz_to_maz_walk()
maz_to_maz_walk

To integrate these data sources, we will set a redirection on the skims.
This will add the MAZ dimensions to the skims, MAZ id's as additional 
coordinates, and will set attribute flags to tell sharrow which dimensions
have been redirected.

In [None]:
skims.redirection.set(
    maz_taz, 
    map_to='otaz', 
    name="omaz",
    map_also={'dtaz': "dmaz"}, 
)

In [None]:
skims

Next, we can attach the sparse skims using `redirection.sparse_blender`.
This formats the sparse skim table into compressed sparse row format,
and attaches the resulting arrays to the Dataset.

In [None]:
skims.redirection.sparse_blender(
    'DISTWALK', 
    maz_to_maz_walk.OMAZ, 
    maz_to_maz_walk.DMAZ, 
    maz_to_maz_walk.DISTWALK,
    max_blend_distance=1.0,
    index=maz_taz.index,
)
skims

Now the skims are ready to use!

For demonstration purposes, let's construct a trips dataframe with just a few
origin-destination pairs. Note that we're using the zone id's from the more 
detailed MAZ system.

In [None]:
trips = pd.DataFrame({
    'orig_maz': [100, 100, 100, 200, 200],
    'dest_maz': [100, 101, 103, 201, 202],
})
trips

We'll then put the trips together with the skims into a DataTree, as
usual for sharrow.

In [None]:
tree = sh.DataTree(
    base=trips,
    skims=skims,
    relationships=(
        "base.orig_maz @ skims.omaz",
        "base.dest_maz @ skims.dmaz",
    )
)

Now we can setup flows on this tree.

In [None]:
flow = tree.setup_flow({
    'plain_distance': 'DISTWALK',
}, boundscheck=True)

In [None]:
flow.load()

Where the sparse (maz) data is missing or exceeds the max blending distance,
the dense (taz) data is returned.  Otherwise, the output is not strictly taken 
from the sparse or dense skims, but it is a blended mixture of the two. 

In [None]:
# TEST
from pytest import approx
sparse_dat = np.array([0.01, 0.2, np.nan, 3.2, np.nan])
dense_dat = np.array([0.12,0.12,0.12,0.17,0.17])
def blend(s,d, max_s):
    out = np.zeros_like(d)
    ratio = s/max_s
    out = d*ratio + s*(1-ratio)
    out = np.where(s>max_s, d, out)
    out = np.where(np.isnan(s), d, out)
    return out
assert blend(sparse_dat, dense_dat, 1.0) == approx(flow.load().ravel())

We can apply all the transformation we like, as usual.

In [None]:
flow2 = tree.setup_flow({
    'plain_distance': 'DISTWALK',
    'clip_distance': 'DISTWALK.clip(upper=0.15)',
    'square_distance': 'DISTWALK**2',
})

In [None]:
flow2.load_dataframe()

In [None]:
# TEST
assert flow2.load_dataframe().values == approx(np.array([
    [ 1.1100e-02,  1.1100e-02,  1.2321e-04],
    [ 1.8400e-01,  1.5000e-01,  3.3856e-02],
    [ 1.2000e-01,  1.2000e-01,  1.4400e-02],
    [ 1.7000e-01,  1.5000e-01,  2.8900e-02],
    [ 1.7000e-01,  1.5000e-01,  2.8900e-02]], dtype=np.float32)
)

## Using at and iat

The `at` and `iat` accessors work even when sparse matrix tables are
attached to a Dataset, with a few caveats.  First, only 2-dimension
sparse tables are supported at this time.  Second, these accessors 
rely on the ability to reference the sparse data, which is lost if 
the dataset is naively filtered for variable names; filtering should
instead be done in the `_names` argument, which filters the 
output of the accessor instead of the input, without needing to build
the entire filtered dataset first.  For example:

In [None]:
skims.at(
    omaz=trips.orig_maz,
    dmaz=trips.dest_maz,
    _names=['DIST', 'DISTWALK'],
)

In [None]:
# TEST
out = skims.at(
    omaz=trips.orig_maz,
    dmaz=trips.dest_maz,
    _names=['DIST', 'DISTWALK'], _load=True,
)
np.testing.assert_array_almost_equal(
    out['DIST'].to_numpy(), 
    np.array([0.12, 0.12, 0.12, 0.17, 0.17], dtype=np.float32)
)
np.testing.assert_array_almost_equal(
    out['DISTWALK'].to_numpy(), 
    np.array([0.0111, 0.184, 0.12,  0.17, 0.17], dtype=np.float32)
)

from pytest import raises
with raises(NotImplementedError):
    skims.at(
        omaz=trips.orig_maz,
        dmaz=trips.dest_maz,
        time_period=['AM', 'AM', 'AM', 'AM', 'AM'],
        _names=['DIST', 'DISTWALK', 'SOV_TIME'], _load=True,
    )

In [None]:
skims.iat(
    omaz=[  0,   0,   0, 100, 100],
    dmaz=[  0,   1,   3, 101, 102],
    _names=['DIST', 'DISTWALK'],
)

In [None]:
# TEST
out = skims.iat(
    omaz=[  0,   0,   0, 100, 100],
    dmaz=[  0,   1,   3, 101, 102],
    _names=['DIST', 'DISTWALK'], _load=True,
)
np.testing.assert_array_almost_equal(
    out['DIST'].to_numpy(), 
    np.array([0.12, 0.12, 0.12, 0.17, 0.17], dtype=np.float32)
)
np.testing.assert_array_almost_equal(
    out['DISTWALK'].to_numpy(), 
    np.array([0.0111, 0.184, 0.12,  0.17, 0.17], dtype=np.float32)
)


To circumvent the redirection, and sparse lookup and blending,
simply point the accessor lookups to the dense dimensions:

In [None]:
skims.at(
    otaz=[1,1,1,16,16],
    dtaz=[1,1,1,16,16],
    _names=['DIST', 'DISTWALK'], _load=True,
)

In [None]:
skims.at(
    otaz=[1,1,1,16,16],
    dtaz=[1,1,1,16,16],
    _name='DISTWALK',
)

In [None]:
# TEST
import sys
if sys.version_info > (3,8):
    import secrets
    token = "skims-with-sparse" + secrets.token_hex(5)
    readback0 = skims.shm.to_shared_memory(token)
    assert readback0.attrs == skims.attrs
    readback = sh.Dataset.shm.from_shared_memory(token)
    assert readback.attrs == skims.attrs
    
    out = readback.iat(
        omaz=[  0,   0,   0, 100, 100],
        dmaz=[  0,   1,   3, 101, 102],
        _names=['DIST', 'DISTWALK'], _load=True,
    )
    np.testing.assert_array_almost_equal(
        out['DIST'].to_numpy(), 
        np.array([0.12, 0.12, 0.12, 0.17, 0.17], dtype=np.float32)
    )
    np.testing.assert_array_almost_equal(
        out['DISTWALK'].to_numpy(), 
        np.array([0.0111, 0.184, 0.12,  0.17, 0.17], dtype=np.float32)
    )

    out = readback.at(
        omaz=trips.orig_maz,
        dmaz=trips.dest_maz,
        _names=['DIST', 'DISTWALK'], _load=True,
    )
    np.testing.assert_array_almost_equal(
        out['DIST'].to_numpy(), 
        np.array([0.12, 0.12, 0.12, 0.17, 0.17], dtype=np.float32)
    )
    np.testing.assert_array_almost_equal(
        out['DISTWALK'].to_numpy(), 
        np.array([0.0111, 0.184, 0.12,  0.17, 0.17], dtype=np.float32)
    )
    
    assert readback.redirection.blenders == {'DISTWALK': {'max_blend_distance': 1.0, 'blend_distance_name': None}}


In [None]:
# TEST
assert skims.redirection.blenders == {'DISTWALK': {'max_blend_distance': 1.0, 'blend_distance_name': None}}

In [None]:
# TEST
# reverse skims in sparse
flow3 = tree.setup_flow({
    'plain_distance': 'DISTWALK',
    'reverse_distance': 'skims.reverse("DISTWALK")',
})

assert flow3.load() == approx(np.array([[ 0.0111,  0.0111],
       [ 0.184 ,  0.12  ],
       [ 0.12  ,  0.12  ],
       [ 0.17  ,  0.17  ],
       [ 0.17  ,  0.17  ]], dtype=np.float32))

z = skims.iat(
    omaz=[  0,   1,   3, 101, 102],
    dmaz=[  0,   0,   0, 100, 100],
    _names=['DIST', 'DISTWALK'], _load=True,
)
assert z['DISTWALK'].data == approx(np.array([ 0.0111,  0.12  ,  0.12  ,  0.17  ,  0.17  ]))
assert z['DIST'].data == approx(np.array([ 0.12,  0.12  ,  0.12  ,  0.17  ,  0.17  ]))