In [4]:
from copy import deepcopy

import rdflib as rdf
from rdflib import OWL, RDF, RDFS

from typing import List, Tuple, Optional, Any
Triple = Tuple[Any, Any, Any]



In [5]:
g = rdf.Graph()

In [35]:
g.parse('../paml/paml.ttl', format='turtle')

<Graph identifier=N2029fcab25f848a2bbd4b39663c16a83 (<class 'rdflib.graph.Graph'>)>

In [36]:
q = """
    PREFIX owl: <%s>

    SELECT ?rel
    {
        ?_p a owl:Restriction ;
            ?rel ?_x .
    }
"""%rdf.OWL
q

'\n    PREFIX owl: <http://www.w3.org/2002/07/owl#>\n\n    SELECT ?rel\n    {\n        ?_p a owl:Restriction ;\n            ?rel ?_x .\n    }\n'

In [37]:
for r in g.query(q):
    print(r["rel"])

http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2002/07/owl#minCardinality
http://www.w3.org/2002/07/owl#maxCardinality
http://www.w3.org/2002/07/owl#allValuesFrom
http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/2002/07/owl#minCardinality
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/2002/07/owl#minCardinality
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/2002/07/owl#allValuesFrom
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/2002/07/owl#allValuesFrom
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/2002/07/owl#maxCardinality
http://www.w3.org/2002/07/owl#onProperty
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/20

In [38]:
rels = [OWL.allValuesFrom, OWL.minCardinality, OWL.maxCardinality, RDF.type]
ignore = [OWL.onProperty, RDFS.comment]

In [39]:
q = """
    PREFIX owl: <%s>

    SELECT ?r
    {
        ?r a owl:Restriction .
    }
"""%rdf.OWL


restrs = [r["r"] for r in g.query(q)]
restrs

[rdflib.term.BNode('ub8bL232C5'),
 rdflib.term.BNode('ub9bL490C4'),
 rdflib.term.BNode('ub8bL549C9'),
 rdflib.term.BNode('ub8bL548C9'),
 rdflib.term.BNode('ub8bL319C9'),
 rdflib.term.BNode('ub8bL550C9'),
 rdflib.term.BNode('ub9bL273C5'),
 rdflib.term.BNode('ub8bL640C5'),
 rdflib.term.BNode('ub9bL130C5'),
 rdflib.term.BNode('ub8bL383C5'),
 rdflib.term.BNode('ub8bL376C5'),
 rdflib.term.BNode('ub8bL621C9'),
 rdflib.term.BNode('ub8bL112C9'),
 rdflib.term.BNode('ub8bL405C5'),
 rdflib.term.BNode('ub8bL518C5'),
 rdflib.term.BNode('ub9bL355C13'),
 rdflib.term.BNode('ub8bL243C5'),
 rdflib.term.BNode('ub8bL349C5'),
 rdflib.term.BNode('ub8bL373C5'),
 rdflib.term.BNode('ub8bL664C5'),
 rdflib.term.BNode('ub9bL435C29'),
 rdflib.term.BNode('ub9bL156C5'),
 rdflib.term.BNode('ub8bL83C20'),
 rdflib.term.BNode('ub8bL367C9'),
 rdflib.term.BNode('ub8bL334C5'),
 rdflib.term.BNode('ub8bL137C9'),
 rdflib.term.BNode('ub8bL622C9'),
 rdflib.term.BNode('ub8bL377C5'),
 rdflib.term.BNode('ub8bL98C5'),
 rdflib.term.

In [40]:
def is_bad_restr(restr, graph) -> bool :
    rrs = set()
    for _r, rel, _x in graph.triples((restr, None, None)):
        if rel in rels:
            rrs.add(rel)
    assert len(rrs) > 0, f"No components to restriction {restr}"
    return len(rrs) > 1
    

In [41]:
bad = [x for x in restrs if is_bad_restr(x, g)]
bad

[rdflib.term.BNode('ub8bL232C5'),
 rdflib.term.BNode('ub9bL490C4'),
 rdflib.term.BNode('ub8bL549C9'),
 rdflib.term.BNode('ub8bL548C9'),
 rdflib.term.BNode('ub8bL319C9'),
 rdflib.term.BNode('ub8bL550C9'),
 rdflib.term.BNode('ub9bL273C5'),
 rdflib.term.BNode('ub8bL640C5'),
 rdflib.term.BNode('ub9bL130C5'),
 rdflib.term.BNode('ub8bL383C5'),
 rdflib.term.BNode('ub8bL376C5'),
 rdflib.term.BNode('ub8bL621C9'),
 rdflib.term.BNode('ub8bL112C9'),
 rdflib.term.BNode('ub8bL405C5'),
 rdflib.term.BNode('ub8bL518C5'),
 rdflib.term.BNode('ub9bL355C13'),
 rdflib.term.BNode('ub8bL243C5'),
 rdflib.term.BNode('ub8bL349C5'),
 rdflib.term.BNode('ub8bL373C5'),
 rdflib.term.BNode('ub8bL664C5'),
 rdflib.term.BNode('ub9bL435C29'),
 rdflib.term.BNode('ub9bL156C5'),
 rdflib.term.BNode('ub8bL83C20'),
 rdflib.term.BNode('ub8bL367C9'),
 rdflib.term.BNode('ub8bL334C5'),
 rdflib.term.BNode('ub8bL137C9'),
 rdflib.term.BNode('ub8bL622C9'),
 rdflib.term.BNode('ub8bL377C5'),
 rdflib.term.BNode('ub8bL98C5'),
 rdflib.term.

In [42]:
def describe_bad_restr(b, g: rdf.Graph):
    nsm = rdf.namespace.NamespaceManager(g)
    nsm.bind('owl', OWL)
    triples = g.triples((b, None, None))
    for x, _y, z in g.triples((b, RDF.type, None)):
        print("%s a %s"%(x, z))
    for _, y, z in triples:
        if y != RDF.type:
            print("\t%s %s"%(nsm.normalizeUri(y), nsm.normalizeUri(z)))

In [43]:
describe_bad_restr(bad[0], g)


ub8bL232C5 a http://www.w3.org/2002/07/owl#Restriction
	owl:onProperty uml:firstEventValue
	owl:minCardinality <1>
	owl:maxCardinality <1>
	owl:allValuesFrom uml:OrderedPropertyValue


In [44]:
def translate_bad_restr(b, g):
    nsm = rdf.namespace.NamespaceManager(g)
    nsm.bind('owl', OWL)
    triples = g.triples((b, None, None))
    types = [z for _x, _y, z in g.triples((b, RDF.type, None))]
    props = [z for _x, _y, z in g.triples((b, OWL.onProperty, None))]
    assert len(props) == 1
    prop = props[0]

In [45]:

def translate_bad_restr(b, g):
    comment: Optional[Any] = None
    new_bnodes: List[rdf.BNode] = []
    to_add: List[Triple] = []
    to_delete: List[Triple] = \
        [tuple for tuple in g.triples((b, None, None))]
    nsm = rdf.namespace.NamespaceManager(g)
    nsm.bind('owl', OWL)
    def normalize(x):
        return nsm.normalizeUri(x)
    def find_children():
        child_triples=g.triples((None, RDFS.subClassOf, b))
        children = [x for x, _, _ in child_triples]
        assert len(children) >= 1
        return children
    triples = g.triples((b, None, None))
    types = [z for _x, _y, z in g.triples((b, RDF.type, None))]
    props = [z for _x, _y, z in g.triples((b, OWL.onProperty, None))]
    comments = [z for _x, _y, z in g.triples((b, RDFS.comment, None))]
    assert len(props) == 1
    assert len(comments) <= 1
    assert len(types) == 1
    prop = props[0]
    if comments:
        comment = comments[0]

    for _, y, z in triples:
        if y not in set(ignore) | {RDF.type}:
            bnode = rdf.BNode()
            new_bnodes.append(bnode)
            print(f"{nsm.normalizeUri(bnode)} a {nsm.normalizeUri(types[0])} ;")
            to_add.append((bnode, RDF.type, types[0]))
            print(f"\towl:onProperty {nsm.normalizeUri(prop)} ;")
            to_add.append((bnode, OWL.onProperty, prop))
            print(f"\t{nsm.normalizeUri(y)} {nsm.normalizeUri(z)}", end='')
            to_add.append((bnode, y, z))
            if comment:
                print(f"\n\trdfs:comment {nsm.normalizeUri(comment)} .")
                to_add.append((bnode, RDFS.comment, comment))
            else:
                print(".")
    print("Children of this restriction are:")
    for x in find_children():
        print(f"\t{x}")
        print(f"\tRemove {normalize(x)} rdfs:subClassOf {normalize(b)}")
        to_delete.append((x, RDFS.subClassOf, b))
        for nb in new_bnodes:
            print(f"\t{x} {nsm.normalizeUri(RDFS.subClassOf)} {nsm.normalizeUri(nb)}")
            to_add.append((x, RDFS.subClassOf, nb))

    return to_add, to_delete
        


In [46]:
translate_bad_restr(bad[0], g)

<Ne501e8f57b054977af0bc6276ae6faa2> a owl:Restriction ;
	owl:onProperty uml:firstEventValue ;
	owl:minCardinality <1>.
<N107f89530b1c4672b69da6831047c8c3> a owl:Restriction ;
	owl:onProperty uml:firstEventValue ;
	owl:maxCardinality <1>.
<Neda0f48169cb416c8b37bd070f336ed1> a owl:Restriction ;
	owl:onProperty uml:firstEventValue ;
	owl:allValuesFrom uml:OrderedPropertyValue.
Children of this restriction are:
	http://bioprotocols.org/uml#TimeObservation
	Remove uml:TimeObservation rdfs:subClassOf <ub8bL232C5>
	http://bioprotocols.org/uml#TimeObservation rdfs:subClassOf <Ne501e8f57b054977af0bc6276ae6faa2>
	http://bioprotocols.org/uml#TimeObservation rdfs:subClassOf <N107f89530b1c4672b69da6831047c8c3>
	http://bioprotocols.org/uml#TimeObservation rdfs:subClassOf <Neda0f48169cb416c8b37bd070f336ed1>


([(rdflib.term.BNode('Ne501e8f57b054977af0bc6276ae6faa2'),
   rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
   rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Restriction')),
  (rdflib.term.BNode('Ne501e8f57b054977af0bc6276ae6faa2'),
   rdflib.term.URIRef('http://www.w3.org/2002/07/owl#onProperty'),
   rdflib.term.URIRef('http://bioprotocols.org/uml#firstEventValue')),
  (rdflib.term.BNode('Ne501e8f57b054977af0bc6276ae6faa2'),
   rdflib.term.URIRef('http://www.w3.org/2002/07/owl#minCardinality'),
   rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#nonNegativeInteger'))),
  (rdflib.term.BNode('N107f89530b1c4672b69da6831047c8c3'),
   rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
   rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Restriction')),
  (rdflib.term.BNode('N107f89530b1c4672b69da6831047c8c3'),
   rdflib.term.URIRef('http://www.w3.org/2002/07/owl#onProperty'),
   rdflib.term.URIRef('http:/

In [47]:
bad[0]

rdflib.term.BNode('ub8bL232C5')

In [48]:
rdf.namespace.NamespaceManager(g).normalizeUri(bad[0])

'<ub8bL232C5>'

In [49]:
def repair_all_bad_restrictions(bad: List[rdf.BNode], g: rdf.Graph):
    all_adds: List[Triple] = []
    all_deletes: List[Triple] = []
    for x in bad:
        to_add, to_delete = translate_bad_restr(x, g)
        all_adds += to_add
        all_deletes += to_delete
    for x in all_adds:
        g.add(x)
    for x in all_deletes:
        g.remove(x)
    return g
        

In [50]:
for x in g.triples((None, None, None)):
    print(x)
    break

(rdflib.term.BNode('ub8bL232C5'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Restriction'))


In [51]:
new_g = deepcopy(g)

repair_all_bad_restrictions(bad, new_g)

Type of container used for storing the samples. The size and dimension may not match that of the array: it is up to execution to lay out the array in one or more  does not look like a valid URI, trying to serialize this will break.
Type of container used for storing the samples. The size and dimension may not match that of the array: it is up to execution to lay out the array in one or more  does not look like a valid URI, trying to serialize this will break.
Type of container used for storing the samples. The size and dimension may not match that of the array: it is up to execution to lay out the array in one or more  does not look like a valid URI, trying to serialize this will break.
N-dimensional array of URI for specification or  does not look like a valid URI, trying to serialize this will break.
N-dimensional array of URI for specification or  does not look like a valid URI, trying to serialize this will break.
N-dimensional array of URI for specification or  does not look like 

<N31f7179b3e96467e94b942076fa6356a> a owl:Restriction ;
	owl:onProperty uml:firstEventValue ;
	owl:minCardinality <1>.
<Nc31c87f93dce4e978dae168f741ac463> a owl:Restriction ;
	owl:onProperty uml:firstEventValue ;
	owl:allValuesFrom uml:OrderedPropertyValue.
<N1791620d9a0b46da958add2c90ce7b1d> a owl:Restriction ;
	owl:onProperty uml:firstEventValue ;
	owl:maxCardinality <1>.
Children of this restriction are:
	http://bioprotocols.org/uml#TimeObservation
	Remove uml:TimeObservation rdfs:subClassOf <ub8bL232C5>
	http://bioprotocols.org/uml#TimeObservation rdfs:subClassOf <N31f7179b3e96467e94b942076fa6356a>
	http://bioprotocols.org/uml#TimeObservation rdfs:subClassOf <Nc31c87f93dce4e978dae168f741ac463>
	http://bioprotocols.org/uml#TimeObservation rdfs:subClassOf <N1791620d9a0b46da958add2c90ce7b1d>
<Ne470706150654787aa57a2711c4642e4> a owl:Restriction ;
	owl:onProperty paml:queryString ;
	owl:minCardinality <1>.
Children of this restriction are:
	http://bioprotocols.org/paml#ContainerSpec
	R

<Graph identifier=N2029fcab25f848a2bbd4b39663c16a83 (<class 'rdflib.graph.Graph'>)>

In [54]:
g.serialize(destination='paml-original.nt')

In [52]:
new_g.serialize(destination='paml-revised.nt')

In [53]:
new_g.serialize(destination='paml-revised.ttl', format='turtle')

# Now fix the UML file

In [55]:
g = rdf.Graph().parse('../uml/uml.ttl', format='turtle')
g.serialize(destination='uml-original.nt')

In [56]:
q = """
    PREFIX owl: <%s>

    SELECT ?r
    {
        ?r a owl:Restriction .
    }
"""%rdf.OWL


restrs = [r["r"] for r in g.query(q)]
restrs

[rdflib.term.BNode('ub10bL378C5'),
 rdflib.term.BNode('ub10bL164C5'),
 rdflib.term.BNode('ub10bL661C5'),
 rdflib.term.BNode('ub10bL519C5'),
 rdflib.term.BNode('ub10bL642C5'),
 rdflib.term.BNode('ub10bL550C9'),
 rdflib.term.BNode('ub10bL439C5'),
 rdflib.term.BNode('ub10bL126C9'),
 rdflib.term.BNode('ub10bL459C5'),
 rdflib.term.BNode('ub10bL621C9'),
 rdflib.term.BNode('ub10bL517C5'),
 rdflib.term.BNode('ub10bL198C5'),
 rdflib.term.BNode('ub10bL240C5'),
 rdflib.term.BNode('ub10bL205C5'),
 rdflib.term.BNode('ub10bL374C5'),
 rdflib.term.BNode('ub10bL406C5'),
 rdflib.term.BNode('ub10bL441C5'),
 rdflib.term.BNode('ub10bL541C9'),
 rdflib.term.BNode('ub10bL112C9'),
 rdflib.term.BNode('ub10bL99C5'),
 rdflib.term.BNode('ub10bL298C5'),
 rdflib.term.BNode('ub10bL113C9'),
 rdflib.term.BNode('ub10bL570C5'),
 rdflib.term.BNode('ub10bL150C9'),
 rdflib.term.BNode('ub10bL518C5'),
 rdflib.term.BNode('ub10bL276C5'),
 rdflib.term.BNode('ub10bL607C9'),
 rdflib.term.BNode('ub10bL460C5'),
 rdflib.term.BNode('u

In [57]:
bad = [x for x in restrs if is_bad_restr(x, g)]
bad

[rdflib.term.BNode('ub10bL378C5'),
 rdflib.term.BNode('ub10bL164C5'),
 rdflib.term.BNode('ub10bL661C5'),
 rdflib.term.BNode('ub10bL519C5'),
 rdflib.term.BNode('ub10bL642C5'),
 rdflib.term.BNode('ub10bL550C9'),
 rdflib.term.BNode('ub10bL439C5'),
 rdflib.term.BNode('ub10bL126C9'),
 rdflib.term.BNode('ub10bL459C5'),
 rdflib.term.BNode('ub10bL621C9'),
 rdflib.term.BNode('ub10bL517C5'),
 rdflib.term.BNode('ub10bL198C5'),
 rdflib.term.BNode('ub10bL240C5'),
 rdflib.term.BNode('ub10bL205C5'),
 rdflib.term.BNode('ub10bL374C5'),
 rdflib.term.BNode('ub10bL406C5'),
 rdflib.term.BNode('ub10bL441C5'),
 rdflib.term.BNode('ub10bL541C9'),
 rdflib.term.BNode('ub10bL112C9'),
 rdflib.term.BNode('ub10bL99C5'),
 rdflib.term.BNode('ub10bL298C5'),
 rdflib.term.BNode('ub10bL113C9'),
 rdflib.term.BNode('ub10bL570C5'),
 rdflib.term.BNode('ub10bL150C9'),
 rdflib.term.BNode('ub10bL518C5'),
 rdflib.term.BNode('ub10bL276C5'),
 rdflib.term.BNode('ub10bL607C9'),
 rdflib.term.BNode('ub10bL460C5'),
 rdflib.term.BNode('u

In [58]:
new_g = deepcopy(g)

repair_all_bad_restrictions(bad, new_g)

<Naf4c82bae83341e8ba473c94e418e685> a owl:Restriction ;
	owl:onProperty uml:isUnique ;
	owl:allValuesFrom xsd:boolean.
Children of this restriction are:
	http://bioprotocols.org/uml#Parameter
	Remove uml:Parameter rdfs:subClassOf <ub10bL378C5>
	http://bioprotocols.org/uml#Parameter rdfs:subClassOf <Naf4c82bae83341e8ba473c94e418e685>
<Na556454fff23485098a4b26b8214f20c> a owl:Restriction ;
	owl:onProperty uml:referenceValue ;
	owl:allValuesFrom sbol:Identified.
<Naf9f95a092ea47a282418765ec7c923b> a owl:Restriction ;
	owl:onProperty uml:referenceValue ;
	owl:minCardinality <1>.
<N41e67d7b45c645cfab16d8093e27d107> a owl:Restriction ;
	owl:onProperty uml:referenceValue ;
	owl:maxCardinality <1>.
Children of this restriction are:
	http://bioprotocols.org/uml#LiteralReference
	Remove uml:LiteralReference rdfs:subClassOf <ub10bL164C5>
	http://bioprotocols.org/uml#LiteralReference rdfs:subClassOf <Na556454fff23485098a4b26b8214f20c>
	http://bioprotocols.org/uml#LiteralReference rdfs:subClassOf <

<Graph identifier=N4b054f7e0a6f437cb224cde002193bbe (<class 'rdflib.graph.Graph'>)>

In [59]:
g.serialize(destination='uml-original.nt')

In [60]:
new_g.serialize(destination='uml-revised.nt')

In [61]:
new_g.serialize(destination='uml-revised.ttl', format='turtle')