In [7]:
#!/usr/bin/env python3
import sys
import re
from rdflib import Graph, Namespace, RDF, SH


def split_relation_suffix(local_name, class_name=None):
    """
    Split 'isProcessedByOrganisation' into ('isProcessedBy','Organisation'),
    either by matching the class_name suffix or by camel-case.
    """
    if class_name and local_name.endswith(class_name):
        return local_name[:-len(class_name)], class_name
    m = re.match(r'(.+?)([A-Z][a-z0-9].*)$', local_name)
    return m.groups() if m else (local_name, '')


def build_new_suffix(path_local, min_c, max_c, class_local=None):
    """
    For class constraints: <rel>Max<max><Class>
    For datatype: if min==max: has<min><CapitalizedPath>
                  else: hasMin<min>Max<max><CapitalizedPath>
    """
    if class_local:
        rel, _ = split_relation_suffix(path_local, class_local)
        return f"{rel}Max{max_c}{class_local}"
    cap = path_local[0].upper() + path_local[1:]
    return (
        f"has{min_c}{cap}"
        if min_c == max_c
        else f"hasMin{min_c}Max{max_c}{cap}"
    )


def build_renaming_map(infile):
    """
    Parses `infile`, identifies GUID-shaped PropertyShapes and builds a renaming map.
    Returns a tuple (renames, all_guids).
    """
    g = Graph()
    g.parse(infile, format="turtle")

    OM = Namespace("http://data.boehringer.com/ontology/omics#")
    renames = {}
    all_guids = []

    for node in g.subjects(RDF.type, SH.NodeShape):
        if not str(node).startswith(str(OM)):
            continue
        shape_name = str(node).split("#")[-1]

        for ps in g.objects(node, SH.property):
            if not str(ps).startswith(str(OM)):
                continue
            guid = str(ps).split("#")[-1]
            if re.fullmatch(r"[0-9a-f\-]{36}", guid) is None:
                continue
            all_guids.append(guid)

            path_uri   = g.value(ps, SH.path)
            min_lit    = g.value(ps, SH.minCount)
            max_lit    = g.value(ps, SH.maxCount)
            class_uri  = g.value(ps, SH["class"])

            if path_uri is None:
                continue

            min_c = int(str(min_lit)) if min_lit else 0
            max_c = int(str(max_lit)) if max_lit else 0
            if min_c == 0 and max_c == 0 and class_uri is None:
                continue

            path_local  = str(path_uri).split("#")[-1]
            class_local = str(class_uri).split("#")[-1] if class_uri else None

            suffix = build_new_suffix(path_local, min_c, max_c, class_local)
            new_name = f"{shape_name}-{suffix}"
            renames[str(ps)] = OM[new_name]

    return renames, all_guids


def apply_renames(infile, outfile, renames):
    """
    Applies `renames` map to `infile` graph and writes to `outfile`.
    """
    g = Graph()
    g.parse(infile, format="turtle")

    # Replace URIs in-place
    for old_str, new in renames.items():
        old = g.namespace_manager.expand_curie(old_str)[1] if old_str.startswith("<") else old_str
        subj_node = None
        # find the Node for old URI
        for s in g.subjects(None, None):
            if str(s) == old_str or str(s) == str(new):
                subj_node = s
                break
        # simpler: operate directly on URIRefs
        from rdflib import URIRef
        old_uri = URIRef(old_str)
        for p, o in g.predicate_objects(old_uri):
            g.add((new, p, o))
        for s, p in g.subject_predicates(old_uri):
            g.add((s, p, new))
        g.remove((old_uri, None, None))
        g.remove((None, None, old_uri))

    g.serialize(destination=outfile, format="turtle")
    print(f"Wrote renamed shapes to {outfile}")



In [None]:

def main():
    # build map from primary model
    primary = "omics_model.ttl"
    renames, all_guids = build_renaming_map(primary)

    # apply to each file
    files = ["omics_model.ttl", "violations.ttl", "instances.ttl"]
    for fname in files:
        outfile = fname.replace('.ttl', '_renamed.ttl')
        apply_renames(fname, outfile, renames)

    # report unrenamed GUIDs
    renamed_guids = set(renames.keys())
    unrenamed = [g for g in all_guids if g not in renamed_guids]
    if unrenamed:
        sys.stderr.write("Could not rename the following GUID-shaped property shapes:\n")
        for u in unrenamed:
            sys.stderr.write(f"- {u}\n")

if __name__ == '__main__':
    main()


Wrote renamed shapes to omics_model_renamed.ttl
Wrote renamed shapes to violations_renamed.ttl
Wrote renamed shapes to instances_renamed.ttl


Could not rename the following GUID-shaped property shapes:
- 46804d2a-95e6-4416-9001-7563bf14c333
- d08a7d6a-6153-4f62-89bb-bf837925595a
- dd50121f-225a-4708-b006-4be0fc2952a7
- c31b9ba0-ef42-4289-9699-27ae6bb0c7d9
- fb0b28d4-e160-47ad-9f33-cf699c89d565
- 3f7b14c8-d5d4-4a77-b9a8-31c664cdb11c
- ece15faa-30b3-471c-ae51-2d6d1f80e3a9
- 010a00fe-8dae-43ae-bbc8-313601f12465
- 56baf6a6-d66e-4de6-a492-dae542427534


: 