In [286]:
import pandas as pd

# tx view as seen by mongo
tx_view = [
    {
        "signal": "context_001", # pri_key
        "layout": "location",    # pri_key
        "version": 1,            # pri_key
        "path": ["sites","site_1","assets","asset_1"]
    },
    { "signal":"context_123", "layout":"location", "version": 1, "path": ["sites","site_1","assets","asset_1"]},
    { "signal":"context_123", "layout":"process", "version": 1, "path": ["process_1","step_1","role_1"]},
    { "signal":"context_124", "layout":"location", "version": 1, "path": ["sites","site_1","assets","asset_2"]},
    { "signal":"context_124", "layout":"measurement", "version": 1, "path": ["units","newton"]},
    { "signal":"context_125", "layout":"process", "version": 1, "path": ["process_1","step_1","role_1"]},
    { "signal":"context_125", "layout":"process", "version": 2, "path": ["process_1","step_2","role_1"]},
    { "signal":"context_125", "layout":"measurement", "version": 1, "path": ["units","mm"]},
]

# add a new signal entry that changes the tree shape
tx_view.append(
    { "signal":"context_301", "layout":"location", "version": 1, "path": ["other_sites","site_x","assets","asset_1"]},
)

# add a new version of an existing entry which changes the tree shape
tx_view.append(
    { "signal":"context_123", "layout":"location", "version": 2, "path": ["sites","site_1","assets","asset_1","parts","part_1"]},
)


# make the reporting snapshot view when changes happen
def flatten_path(obj):
    for i,pc in enumerate(obj['path']):
        obj[f"level_{i}"] = pc 
    obj.pop("path")
    return obj
    
df = pd.DataFrame.from_records(list(map(flatten_path,tx_view)))
df['maxver'] = df.groupby(['signal','layout'])['version'].transform('max')

# use the reporting snapshot view to answer questions

print("all hierarchy data")
display(df)

print("latest snapshot of 'process' hierarchy")
display(df[(df['layout'] == 'process') & (df['maxver'] == df['version'])])

print("signals with units of newtons")
display(df[(df['layout'] == 'measurement') & (df['level_1'] == "newton")])

print("location tree")
display(df[df['layout'] == "location"].groupby(["level_0","level_1","level_2","level_3"]).agg({"signal":"size","maxver":"max"}).rename(columns={"signal":"child_count"}))

print("process tree")
display(df[df['layout'] == "process"].groupby(["level_0","level_1","level_2"]).agg({"signal":"size","maxver":"max"}).rename(columns={"signal":"child_count"}))

print("measurement tree")
display(df[df['layout'] == "measurement"].groupby(["level_0","level_1"]).agg({"signal":"size","maxver":"max"}).rename(columns={"signal":"child_count"}))

all hierarchy data


Unnamed: 0,signal,layout,version,level_0,level_1,level_2,level_3,level_4,level_5,maxver
0,context_001,location,1,sites,site_1,assets,asset_1,,,1
1,context_123,location,1,sites,site_1,assets,asset_1,,,2
2,context_123,process,1,process_1,step_1,role_1,,,,1
3,context_124,location,1,sites,site_1,assets,asset_2,,,1
4,context_124,measurement,1,units,newton,,,,,1
5,context_125,process,1,process_1,step_1,role_1,,,,2
6,context_125,process,2,process_1,step_2,role_1,,,,2
7,context_125,measurement,1,units,mm,,,,,1
8,context_301,location,1,other_sites,site_x,assets,asset_1,,,1
9,context_123,location,2,sites,site_1,assets,asset_1,parts,part_1,2


latest snapshot of 'process' hierarchy


Unnamed: 0,signal,layout,version,level_0,level_1,level_2,level_3,level_4,level_5,maxver
2,context_123,process,1,process_1,step_1,role_1,,,,1
6,context_125,process,2,process_1,step_2,role_1,,,,2


signals with units of newtons


Unnamed: 0,signal,layout,version,level_0,level_1,level_2,level_3,level_4,level_5,maxver
4,context_124,measurement,1,units,newton,,,,,1


location tree


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,child_count,maxver
level_0,level_1,level_2,level_3,Unnamed: 4_level_1,Unnamed: 5_level_1
other_sites,site_x,assets,asset_1,1,1
sites,site_1,assets,asset_1,3,2
sites,site_1,assets,asset_2,1,1


process tree


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,child_count,maxver
level_0,level_1,level_2,Unnamed: 3_level_1,Unnamed: 4_level_1
process_1,step_1,role_1,2,2
process_1,step_2,role_1,1,2


measurement tree


Unnamed: 0_level_0,Unnamed: 1_level_0,child_count,maxver
level_0,level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
units,mm,1,1
units,newton,1,1
