In [1]:
import glob
import pandas as pd
from prov.model import ProvDocument, ProvEntity, ProvAssociation, ProvGeneration, ProvUsage, ProvActivity
from prov.dot import prov_to_dot

In [2]:
# Read PROV model from RDF file
def readProv(filename):
    with open("%s.ttl" % (filename,), 'r') as f:
        return(ProvDocument.deserialize(source=f, format='rdf'))

In [3]:
# Create SVG from PROV model
def prov2svg(prov_doc, svg_filename):
    prov_doc.plot(filename='%s.svg' % (svg_filename,))

In [4]:
# Create DOT from PROV model
def prov2dot(prov_doc, dot_filename):
    prov_to_dot(prov_doc).write('%s.dot' % (dot_filename,))
    #with open(, 'w') as f:
    #    f.write()
    #    f.close()

In [5]:
# Extract entities from PROV model
def prov2entities_csv(prov_doc, csv_filename):
    df = pd.DataFrame()
    for r in prov_doc.get_records((ProvEntity, ProvActivity)):
        label = "; ".join(r.get_attribute('prov:label'))
        file_type = "; ".join(r.get_attribute('da:fileType'))
        ID = str(r.identifier)
        entry = {
            'type': r.__class__.__name__,
            'ID': ID,
            'label': label,
            'file_type': file_type,
        }
        df = df.append(pd.DataFrame(entry, index=[0]), ignore_index=True)
    df.sort_values(by=['type','ID']).to_csv("%s_entities.csv" % (csv_filename,))

In [6]:
# Extract relations from PROV model
def prov2rel_csv(prov_doc, csv_filename):
    df = pd.DataFrame()
    for r in prov_doc.get_records((ProvAssociation, ProvGeneration, ProvUsage)):
        attrs = r.formal_attributes
        entry = {
            'type': r.__class__.__name__,
            'com1_type': str(attrs[0][0]),
            'com1_ID': str(attrs[0][1]),
            'com2_type': str(attrs[1][0]),
            'com2_ID': str(attrs[1][1]),
            'role': "; ".join([str(role) for role in r.get_attribute('prov:role')])
        }
        df = df.append(pd.DataFrame(entry, index=[0]), ignore_index=True)
    df.sort_values(by=['type', 'com1_ID', 'com2_ID']).to_csv("%s_relations.csv" % (csv_filename,))

In [7]:
# Compute PROV models
files = glob.glob("model-based/*.ttl")
files += glob.glob("model-based/pattern/*.ttl")
for file in files:
    if '_old' in file:
        continue
    n = file.replace(".ttl", "")
    print("Computing '%s'..." % (n,), end='')
    pdoc = readProv(n)
    #prov2svg(pdoc, n)
    dot = prov2dot(pdoc, n)
    #prov2entities_csv(pdoc, n)
    #prov2rel_csv(pdoc, n)
    print("OK")
print(dot)

Computing 'model-based/geometry'...OK
Computing 'model-based/context_models'...OK
Computing 'model-based/simulation_models'...OK
Computing 'model-based/pattern/extract-information'...OK
Computing 'model-based/pattern/generation'...OK
Computing 'model-based/pattern/refinement'...OK
Computing 'model-based/pattern/composition'...OK
Computing 'model-based/pattern/parameterisation'...OK
None
