In [1]:
import pandas as pd
import owlready2 as owlr 
#https://pythonhosted.org/Owlready/
import rdflib
from collections import Counter
import uuid



In [2]:
raw_world = owlr.World(filename='raw_world.sqlite3')
rawdms = raw_world.get_ontology("datamodels.owl").load()
clean_world = owlr.World(filename='clean_world.sqlite3')
cleandms = clean_world.get_ontology("datamodels.owl").load()
onto = rawdms

In [3]:
onto_dataproperties = list(onto.data_properties())
onto_individuals = list (onto.individuals())
individuals = [(i, i.is_a.first()) for i in onto_individuals + onto_dataproperties]
serializations = [i[0] for i in individuals if i[1] == onto.Serialization]

for s in serializations:
    print(s)
    mappings = [m for m in s.SerializationContainsMapping]
    for m in mappings:
        print ("\t", m, m.is_a)

datamodels.DomainModelClassEntityRecord
	 datamodels.AttributeMapping [datamodels.Mapping]
	 datamodels.ClassMapping [datamodels.Mapping]
	 datamodels.DomainMapping [datamodels.Mapping]
	 datamodels.ModelMapping [datamodels.Mapping]
	 datamodels.hasAttributeMapping [datamodels.Mapping]
	 datamodels.hasClassMapping [datamodels.Mapping]
	 datamodels.hasDataTypeMapping [datamodels.Mapping]
	 datamodels.hasModelMapping [datamodels.Mapping]
	 datamodels.hasModelTypeMapping [datamodels.Mapping]


In [4]:
with onto:
    owlr.sync_reasoner(raw_world)

* Owlready2 * Running HermiT...
    java -Xmx2000M -cp /usr/local/python/py36/lib/python3.6/site-packages/owlready2/hermit:/usr/local/python/py36/lib/python3.6/site-packages/owlready2/hermit/HermiT.jar org.semanticweb.HermiT.cli.CommandLine -c -O -D -I file:////var/folders/rd/q3bhk6_n56x75m05pp2sq7dr0000gn/T/tmpde4zk1hv
* Owlready2 * HermiT took 1.0635490417480469 seconds
* Owlready * Equivalenting: datamodels.PartialIdentifier datamodels.r_PartialIdentifier
* Owlready * Equivalenting: datamodels.r_PartialIdentifier datamodels.PartialIdentifier
* Owlready * Equivalenting: datamodels.Identifier datamodels.r_Identifier
* Owlready * Equivalenting: datamodels.r_Identifier datamodels.Identifier
* Owlready * Reparenting datamodels.r_PartialIdentifier: {owl.ObjectProperty} => {datamodels.Attribute}
* Owlready * Reparenting datamodels.Identifier: {owl.DatatypeProperty} => {datamodels.Attribute}
* Owlready * Reparenting datamodels.IdentityClassParentModel: {datamodels.IdentityParent, owl.Object

In [5]:
mapped_things = [(c,c.MappingKey) for c in onto.classes() if len(c.MappingKey) > 0] + \
                [(p, p.MappingKey) for p in onto.properties() if len(p.MappingKey) > 0]
mappable_things = {v[0]:k for k,v in mapped_things}
assert all([v==1 for v in Counter([b for c in mapped_things for b in c[1]]).values()])
del mapped_things
mappable_things

{'Attribute': datamodels.Attribute,
 'Class': datamodels.Class,
 'Context': datamodels.Context,
 'DataModel': datamodels.DataModel,
 'Mapping': datamodels.Mapping,
 'Domain': datamodels.ModelDomain,
 'Relationship': datamodels.Relationship,
 'Serialization': datamodels.Serialization,
 'hasDataType': datamodels.DataType,
 'hasModelType': datamodels.ModelTypeCode,
 'hasAttribute': datamodels.ClassHasAttribute,
 'isAttributeParent': datamodels.IdentityAttributeParentClass,
 'isClassParent': datamodels.IdentityClassParentModel,
 'isRelationshipParent': datamodels.IdentityRelationshipParentModel,
 'hasClass': datamodels.ModelContainsClass,
 'hasModel': datamodels.ModelDomainContainsModel}

In [6]:
data_row = { "Business Domain" : "Core", 
             "Model" : "Principal Interactions", 
             "ModelType" : "Conceptual", 
             "Entity" : "Person", 
             "Attribute" : "PersonID", 
             "DataType" : "string"}

In [7]:
def instantiate_thing(label, tclass, space, test_for_existing=True):
    # Check to see if this object of the same class contains the same label
    # if so, then return the first item of that name
    if test_for_existing:
        matches = [i for i in space.search(type=tclass) if label in i.label]
        if len(matches)>0:
            return matches[0]
    # Otherwise, generate a unique ID, create the item in the space and assign this label
    guid = uuid.uuid4().hex
    with space:
        i = tclass(guid, namespace=space)
    i.label=label
    return i
    

In [8]:
def instantiate_property(subject_key, subject_domain, predicate, object_key, object_domain, space):
    # Get classes for domains and ranges
    sd_list = [c for c in list(space.classes()) if c==subject_domain]
    od_list = [c for c in list(space.classes()) if c==object_domain]
    
    # Get instances of classes that match provided keys
    if subject_key is None:
        c_s = [s for l in sd_list for s in l.instances()]
    else:
        c_s = [s for s in sd_list for s in l.instances() if subject_key in s.label]
        
    if object_key is None:
        c_o = [o for l in od_list for o in l.instances()]
    else:
        print ("labels", [(o.label, object_key in o.label, object_key) for o in od_list])
        c_o = [o for l in od_list for o in l.instances()if object_key in o.label]
    
    print(od_list, object_key)
    
    print(predicate)
    predicate.python_name="predicate"
    print(c_s[0], predicate)
    if len(c_o)==0:
        print("property_data", predicate, object_key, od_list)
        prop_relation = c_s[0].predicate.append(object_key)
    else:
        if predicate.is_functional_for(c_o[0]):
            prop_relation = c_s[0].predicate=c_o[0]
        else:
            prop_relation = c_s[0].predicate=[c_o[0]]
    predicate.python_name = ""
    return c_s, predicate, c_o, prop_relation

In [9]:
#ont = cleandms
#ont = onto
#q = instantiate_thing("test", onto.Class, cleandms, True)
#q = instantiate_thing("test 2", onto.Class, onto, True)

#for i in ont.Class.instances():
#    print (i.iri, i.label)

In [10]:
#for i in onto.Class.instances():
#    print (i.iri, i.label)

In [11]:
s_objects = []
p_objects = []
for s in serializations:
    print(s)
    mappings = [m for m in s.SerializationContainsMapping]
    serials = {}
    for m in mappings:
        s_key = m.SerializationLabel.first()
        i_key = m.MappingLabel.first()

        if s_key not in serials.keys():
            serials[s_key]=[]

        print ("\t", m, s_key)
            
        if m.is_a.first() == onto.EntityMapping:
            print("\t\tEntity:{e}".format(e=m))
            o_range = m.MappingRange.first()
            property_d = { "type" : "entity", 
                           "mapping_name" : i_key, 
                           "property" : "rdf:type:", 
                           "range" :    mappable_things[o_range], 
                           "subject_label" : data_row[s_key]}
#            serials[s_key].append((data_row[s_key], "rdf:type", mappable_things[o_range]))
            serials[s_key].append(property_d)
    
        elif m.is_a.first() == onto.PropertyMapping:
            print("\t\tProperty:{e}".format(e=m))
            s_domain = m.MappingDomain.first()
            o_range = m.MappingRange.first()
            property_d = { "type" : "property", 
                           "mapping_name" : i_key, 
                           "domain" :   mappable_things[s_domain], 
                           "property" : mappable_things[i_key], 
                           "range" :    mappable_things.get(o_range,o_range), 
                           "range_search_value" : data_row[s_key]}
            serials[s_key].append(property_d)
            
        else:
            print(m.is_a)
            print(s_key)
            assert False
            
print()

# Instantiate temporary versions of all the in-row objects
for v in serials.values():
    for u in v:
        print(u)
        if u['type'] == "entity":#u[1]=="rdf:type":
            with onto:
                #q = instantiate_thing("test", onto.Class, cleandms, True)
                s_objects.append(instantiate_thing(u['subject_label'], u['range'], onto, True))
                #s_objects.append(instantiate_thing(u[0], u[2], onto, True))
                #u[2](u[0], namespace=onto) # Create instances in the ontology
            print(u)

print()

# Create property links for all identified in-row objects
for v in serials.values():
    for u in v:
        if u['type'] == "property":

            p_objects.append(u)
            instantiate_property(None, u['domain'], u['property'], u["range_search_value"], u['range'], onto)


datamodels.DomainModelClassEntityRecord
	 datamodels.AttributeMapping Attribute
		Entity:datamodels.AttributeMapping
	 datamodels.ClassMapping Entity
		Entity:datamodels.ClassMapping
	 datamodels.DomainMapping Business Domain
		Entity:datamodels.DomainMapping
	 datamodels.ModelMapping Model
		Entity:datamodels.ModelMapping
	 datamodels.hasAttributeMapping Attribute
		Property:datamodels.hasAttributeMapping
	 datamodels.hasClassMapping Entity
		Property:datamodels.hasClassMapping
	 datamodels.hasDataTypeMapping DataType
		Property:datamodels.hasDataTypeMapping
	 datamodels.hasModelMapping Model
		Property:datamodels.hasModelMapping
	 datamodels.hasModelTypeMapping ModelType
		Property:datamodels.hasModelTypeMapping

{'type': 'entity', 'mapping_name': 'Attribute', 'property': 'rdf:type:', 'range': datamodels.Attribute, 'subject_label': 'PersonID'}
{'type': 'entity', 'mapping_name': 'Attribute', 'property': 'rdf:type:', 'range': datamodels.Attribute, 'subject_label': 'PersonID'}
{'type': 

AttributeError: 'str' object has no attribute 'storid'

In [20]:
[(i.is_a, i.label) for i in list(onto.individuals())]

[([datamodels.EntityMapping], []),
 ([datamodels.EntityMapping], []),
 ([datamodels.EntityMapping], []),
 ([datamodels.Serialization], []),
 ([datamodels.EntityMapping], []),
 ([datamodels.PropertyMapping], []),
 ([datamodels.PropertyMapping], []),
 ([datamodels.PropertyMapping], []),
 ([datamodels.PropertyMapping], []),
 ([datamodels.PropertyMapping], []),
 ([datamodels.Attribute], ['PersonID']),
 ([datamodels.Class], ['Person']),
 ([datamodels.ModelDomain], ['Core']),
 ([datamodels.DataModel], ['Principal Interactions'])]

In [None]:
onto.Class.instances(), onto.Attribute.instances(), onto.DataModel.instances(), onto.ModelDomain.instances()

In [None]:
#onto.Attribute.instances()[0].AttributeHasDataType=["string"]

In [None]:
list(onto.Attribute.instances()[0]._get_instance_possible_relations())

In [None]:
dir(onto.DataModel.instances()[0])

In [14]:
onto.DataModel.instances()[0].get_properties()

{datamodels.ModelContainsClass,
 datamodels.ModelTypeCode,
 rdf-schema.label,
 datamodels.ModelIsMemberOfModelDomain}

In [13]:
onto.ModelTypeCode.python_name = "mtc"
onto.DataModel.instances()[0].mtc=["Conceptual"]

In [15]:
onto.DataModel.instances()[0].mtc

['Conceptual']

In [None]:
with onto:
    owlr.sync_reasoner(raw_world)

In [None]:
[c for c in list(onto.classes()) if c==p_objects[0]['domain']][0].instances()

In [None]:
p_objects[0]['property'].is_functional_for(p_objects[0]['range'])

In [None]:
instantiate_property(None, p_objects[0]['domain'], p_objects[0]['property'], p_objects[0]['range_search_value'], p_objects[0]['range'], onto)

In [None]:
[(s,s.is_a, s.label) for s in s_objects]

In [None]:
[(c,c.iri, c.is_a, c.label) for c in onto.Class.instances()]

In [None]:
assert False

In [None]:
serials['Entity']

In [None]:
with onto:
    owlr.sync_reasoner()

In [None]:
onto.get_properties()

In [None]:
list(onto.inconsistent_classes())

In [None]:
for c in list(onto.classes()):
    print (c, c.instances())