**...the next morning**

In [None]:
# Startup databrokers and elastic search
import matplotlib.pyplot as plt
%matplotlib qt5
from pprint import pprint

from rapidz.graph import _clean_text, readable_graph
from xpdan.vend.callbacks.core import Retrieve
from xpdan.vend.callbacks.zmq import Publisher
from xpdconf.conf import glbl_dict

from databroker_elasticsearch import load_elasticindex
from databroker_elasticsearch.brokersearch import BrokerSearch

from databroker import Broker
import yaml
import esconverters

dbs = {}
for yaml_file in ['raw', 'an']:
    with open(f'{yaml_file}.yml', 'r') as f:
        dbs[yaml_file] = Broker.from_config(yaml.load(f))

FYI, the objects we have connected are  
1. databoker databases that contain the metadata about the scans
2. elastic-search indexes that have indexed the databrokers and will return just the metadata if queried
3. Broker-search objects that will return the run-start header objects when queried (this is what is needed to run the analysis)

In [None]:
an_db = dbs['an']
raw_db = dbs['raw']
raw_es = load_elasticindex('es-raw.yaml')
an_es = load_elasticindex('es-an.yaml')
raw_db_es = BrokerSearch(raw_db, raw_es)
an_db_es = BrokerSearch(an_db, an_es)

1. Pavol wakes up and wonders how CJ did last night, but CJ is now sleeping soundly in his bed
1. Pavol wants to use elastic search to search the database of collected data and see how CJ did last night
1. He searches for ``tooth`` in any of the metadata fields

In [None]:
# query raw es for tooth
[d['_source']['sample_name'] for d in raw_es.qsearch('tooth')['hits']['hits']]  # search all fields

1. He finds three datasets, so he knows that CJ had a successful night
1. He checks all the datasets ran to completion
1. He also has other ways to search for the dinosaur tooth

In [None]:
for hdr in raw_db_es('tooth'):
    print(hdr.stop)

In [None]:
raw_es.qsearch('dinosaur')  # search all fields

In [None]:
raw_es.qsearch('sample_name:dinosaur')  # search specific field

In [None]:
raw_es.qsearch('dino*')  # glob-like search

In [None]:
raw_es.qsearch('dinosaurus~2')  # fuzzy search max edit distance of 2

In [None]:
raw_hdr = next(iter(raw_db_es('dinosaurus~2')))
uid = raw_hdr.start['uid']

1. Pavol also wants to know if CJ was able to do any analysis on the data during the night
1. Pavol searches the databroker that contains analyzed data

In [None]:
# queries with an_es
an_es.qsearch('img_sinogram', size=0)

In [None]:
an_es.qsearch('img_sinogram')

In [None]:
an_es.qsearch(f'puid:{uid[:6]}*')  # word in puid

In [None]:
an_es.qsearch('analysis_stage:img_sinogram')

In [None]:
an_es.qsearch('usednodes.ndfunc:*sort_sinogram')

In [None]:
an_es.qsearch('gridrec')

In [None]:
an_es.qsearch('usednodes.ndkwargs.algorithm:gridrec')

1. Pavol wants to know if there was a tomographic reconstruction already done?

In [None]:
# query an_es/databroker for tomo recon
hdrs = an_db_es('analysis_stage:*tomo*')
tomo_analysis_hdr = next(iter(hdrs))

1. Now Pavol wants to replay the same analysis from the database as a sanity check to see if he gets the same answer.
1. He wants to see exactly what analysis CJ did during the night, so he plots the analysis graph that he found in the database from the analysis done last night.

In [None]:
# load and show the graph
from shed.replay import replay

# load the replay
graph, parents, data, vs = replay(raw_db, tomo_analysis_hdr)

# make the graph more accessible to humans by renaming things
# these names *should* match the names in the graph plot
for k, v in graph.nodes.items():
    v.update(label=_clean_text(str(v['stream'])).strip())
graph = readable_graph(graph)

# plot the graph
graph.nodes['data img FromEventStream']['stream'].visualize()

1. Each unique analysis has its own unique id.
2. Each unique graph has its own unique id.

In [None]:
hdrs = list(an_db_es('usednodes.ndkwargs.algorithm:gridrec'))
for hdr in hdrs:
    print('analysis id:', hdr.start['uid'])
for hdr in hdrs:
    print('graph id:', hdr.start['graph_hash'])

In [None]:
# setup a publisher to send over to data viz and capture
p = Publisher(glbl_dict['inbound_proxy_address'], prefix=b'tomo')
z = graph.nodes['img_tomo ToEventStream']['stream'].LastCache().DBFriendly()
z.starsink(p)

1. As a sanity check, Pavol replays the analysis from last night with no changes

In [None]:
# replay analysis with no changes
r = Retrieve(dbs['raw'].reg.handler_reg)
for v in vs:
    d = data[v['uid']]
    dd = r(*d)
    parents[v["node"]].update(dd)

1. Pavol now changes the recostruction algorithm to ``algebraic``.  It is the node called ``recon_wrapper`` and he wants the keyword argument ``algorithm`` to be set to ``'art'`` which selects the reconstruction algorithm we want to use.
1. He then reruns the analysis through the new pipeline, which has just changed by one node.

In [None]:
# change to Algebraic Reconstruction technique
print(graph.nodes['starmap; recon_wrapper']['stream'].kwargs)
graph.nodes['starmap; recon_wrapper']['stream'].kwargs['algorithm'] = 'art'
print(graph.nodes['starmap; recon_wrapper']['stream'].kwargs)

# replay with changes
r = Retrieve(dbs['raw'].reg.handler_reg)
for v in vs:
    d = data[v['uid']]
    dd = r(*d)
    parents[v["node"]].update(dd)

1. Just because he can, Pavol compares the ID of the previous graph and the new one.  They are different because the graphs are different.

In [None]:
# These hashes are different because the algorithms are different
dbs = {}
for yaml_file in ['raw', 'an']:
    with open(f'{yaml_file}.yml', 'r') as f:
        dbs[yaml_file] = Broker.from_config(yaml.load(f))
from databroker_elasticsearch.converters import register_converter

an_db = dbs['an']
raw_db = dbs['raw']
raw_es = load_elasticindex('es-raw.yaml')
an_es = load_elasticindex('es-an.yaml')
raw_db_es = BrokerSearch(raw_db, raw_es)
an_db_es = BrokerSearch(an_db, an_es)

print(an_db[-1].start['graph_hash'])
print(an_db[-2].start['graph_hash'])

1. Pavol searches elastic search for the art reconstruction data
1. Not surprisingly, Pavol wants to compare the previous analysis to the new one.
1. To do this, he retrieves the last event from each stream and plots them

In [None]:
# an_es for new data (via new recon algo)
dbs = {}
for yaml_file in ['raw', 'an']:
    with open(f'{yaml_file}.yml', 'r') as f:
        dbs[yaml_file] = Broker.from_config(yaml.load(f))
from databroker_elasticsearch.converters import register_converter

an_db = dbs['an']
raw_db = dbs['raw']
raw_es = load_elasticindex('es-raw.yaml')
an_es = load_elasticindex('es-an.yaml')
raw_db_es = BrokerSearch(raw_db, raw_es)
an_db_es = BrokerSearch(an_db, an_es)
vqan = lambda q: pprint((q, an_es.qsearch(q)))

an_es.qsearch('art')

In [None]:
hdr1 = next(iter(an_db_es('usednodes.ndkwargs.algorithm:art')))
hdr2 = next(iter(an_db_es('usednodes.ndkwargs.algorithm:gridrec')))

art = next(hdr1.data('img_tomo', stream_name='final_primary'))
grid = next(hdr2.data('img_tomo', stream_name='final_primary'))

# Compare results
fig, axs = plt.subplots(1, 3, tight_layout=True)
for img, ax in zip([art, grid], axs):
    ax.imshow(img)
axs[-1].imshow(art - grid)