In [244]:
import pandas as pd

# tx view
tx_view = {
    "context_123": {
        "location": [{"version": "1", "path": ["sites","site_1","assets","asset_1"]} ],
        "process": [{"version": "1", "path": ["process_1","step_1","role_1"] } ],
    },
    "context_124": {
        "location": [{"version": "1", "path": ["sites","site_1","assets","asset_2"] } ],
        "measurement": [{"version": "1", "path": ["unit","newton"] } ],
    },
    "context_125": {
        "process": [
            {"version": "1", "path": ["process_1","step_1","role_2"] },
            {"version": "2", "path": ["process_1","step_2","role_1"] },
        ],
        "measurement": [{"version": "1", "path": ["unit","newton"] }],
    },
}

# extend the hierarchy by inserting a new signal as v1
# could make these versions keys instead of vals inside an array if it makes
# transaction handling easier
tx_view["context_201"] = { "location": [ {"version":1,"path":["sites","site_2","assets","asset_1"]} ]}

# update the hierarchy by inserting a new version of a signal
tx_view["context_123"]["process"].append({"version":"2","path":["process_1","step_3","role_1"]})


# make the reporting snapshot view when changes happen
def flatten_item(ctx,obj):
    records = list()
    for hier,vers in obj.items():        
        for version in vers:
            fields = {
                "context":ctx,
                "hierarchy": hier,
                "version": version['version'],
            }
            for i,pc in enumerate(version['path']):
                fields[f"level_{i}"] = pc 
            records.append(fields)
    return records

df = pd.DataFrame.from_records([item for sublist in [flatten_item(k,v) for k,v in tx_view.items()] for item in sublist])
df['maxver'] = df.groupby(['context','hierarchy'])['version'].transform('max')

# use the reporting snapshot view to answer questions

print("all hierarchy data")
display(df)

print("latest snapshot of 'process' hierarchy")
display(df[(df['hierarchy'] == 'hierarchy_2') & (df['maxver'] == df['version'])])

print("signals with units of newtons")
display(df[(df['hierarchy'] == 'measurement') & (df['level_1'] == "newton")])

print("child folders of sites")
display(pd.DataFrame(df[df['hierarchy'] == 'location']['level_1'].value_counts()))

print("tree for a hierarchy")
df[df['hierarchy'] == "location"].groupby(["level_0","level_1","level_2","level_3"]).agg({"context":"size","maxver":"max"})

all hierarchy data


Unnamed: 0,context,hierarchy,version,level_0,level_1,level_2,level_3,maxver
0,context_123,location,1,sites,site_1,assets,asset_1,1
1,context_123,process,1,process_1,step_1,role_1,,2
2,context_123,process,2,process_1,step_3,role_1,,2
3,context_124,location,1,sites,site_1,assets,asset_2,1
4,context_124,measurement,1,unit,newton,,,1
5,context_125,process,1,process_1,step_1,role_2,,2
6,context_125,process,2,process_1,step_2,role_1,,2
7,context_125,measurement,1,unit,newton,,,1
8,context_201,location,1,sites,site_2,assets,asset_1,1


latest snapshot of 'process' hierarchy


Unnamed: 0,context,hierarchy,version,level_0,level_1,level_2,level_3,maxver


signals with units of newtons


Unnamed: 0,context,hierarchy,version,level_0,level_1,level_2,level_3,maxver
4,context_124,measurement,1,unit,newton,,,1
7,context_125,measurement,1,unit,newton,,,1


child folders of sites


Unnamed: 0,level_1
site_1,2
site_2,1


tree for a hierarchy


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,context,maxver
level_0,level_1,level_2,level_3,Unnamed: 4_level_1,Unnamed: 5_level_1
sites,site_1,assets,asset_1,1,1
sites,site_1,assets,asset_2,1,1
sites,site_2,assets,asset_1,1,1


In [246]:
print("tree for a hierarchy")
df[df['hierarchy'] == "process"].groupby(["level_0","level_1","level_2"]).agg({"context":"size","maxver":"max"})

tree for a hierarchy


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,context,maxver
level_0,level_1,level_2,Unnamed: 3_level_1,Unnamed: 4_level_1
process_1,step_1,role_1,1,2
process_1,step_1,role_2,1,2
process_1,step_2,role_1,1,2
process_1,step_3,role_1,1,2
