# Serialization Builder

After an ontology model is developed, there needs to be a method of uploading data that fits the model in question. 

This data load process is often fed by flat or tree-like datafiles - perhaps simple CSV, XML or JSON documents. 

A serialization helps marshall structured data into a given ontologically defined graph format. 

The serialization has a name, and contains multiple mappings. 

Each mapping describes some link between named attributes in the input file, and their equivalent ontological markup. Some inputs describe entities of a given type, while others describe relations between these entities, and others describe simple data-properties those entities might have. 



In [1]:
import owlready2 as owlr

import xml.etree.ElementTree as ET



In [2]:
sample_onto_file = "../sample_ontology.owl"
#sample_onto_file = "../Serialization.owl"
sample = owlr.get_ontology(sample_onto_file).load()

In [3]:
sample_ser_file = "../Serialization.owl"
serial = owlr.get_ontology(sample_ser_file).load()



In [4]:
sample.base_iri

'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#'

In [5]:
list(serial.properties())

[Serialization.GeneratedOn,
 Serialization.MappingDomain,
 Serialization.MappingLabel,
 Serialization.MappingRange,
 Serialization.MappingSubType,
 Serialization.SerializationLabel,
 Serialization.SerializationParentLabel,
 Serialization.UniqueIdentifier,
 Serialization.MappingMetaTarget,
 Serialization.ContainsMapping,
 Serialization.MappingMemberOf,
 Serialization.MappingMetaTarget]

In [6]:
serial.base_iri

'http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#'

In [7]:
list(sample.classes()), list(sample.object_properties()), list(sample.data_properties())

([sample_ontology.someClass],
 [sample_ontology.someProperty],
 [sample_ontology.someDataProperty])

In [8]:
def generate_meta_definitions(onto):
    meta_defs = []
    for mc in [(serial.MetaClass, onto.classes), (serial.MetaProperty, onto.object_properties), (serial.MetaDataProperty, onto.data_properties)]:
        for c in mc[1]():
            meta_defs.append((c.iri,mc[0].iri))
    return meta_defs
        

In [9]:
generate_meta_definitions(sample)

[('http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass',
  'http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#MetaClass'),
 ('http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someProperty',
  'http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#MetaProperty'),
 ('http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someDataProperty',
  'http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#MetaDataProperty')]

In [10]:
def generate_xml_for_serialization(namespaces, ontology):
    X = ET.Element('rdf:RDF')
    default_namespace='http://www.company.com'
    for prefix, uri  in namespaces:
        X.set("xmlns:" + prefix, uri)
        
        
    ## Add block for serialization objects:
    for iri in [serial.Serialization.iri, 
              serial.Mapping.iri,
              serial.MetaClass.iri,             
              serial.MetaProperty.iri,
              serial.MetaDataProperty.iri]:
        q = ET.SubElement(X, "Class")
        q.set("rdf:about", iri)
        
    for iri in [ serial.ContainsMapping.iri,               
                 serial.MappingMetaTarget.iri,
                 serial.MappingDomain.iri,
                 serial.MappingRange.iri,
                 serial.MappingMetaTarget.iri,
                 serial.SerializationLabel.iri,
                 serial.SerializationParentLabel.iri]:
        q = ET.SubElement(X, "AnnotationProperty")
        q.set("rdf:about", iri)
    for iri,tp in generate_meta_definitions(ontology):
        q = ET.SubElement(X, "NamedIndividual")
        q.set("rdf:about", iri)
        p = ET.SubElement(q, "rdf:type")
        p.set("rdf:resource", tp)
        
    ET.indent(X, space="\t", level=0)
    return ET.tostring(X).decode()

In [11]:
def create_mapping(name, target, ttype, properties={}):
    X = ET.Element("NamedIndividual")
    X.set("rdf:about", name)
    q = ET.SubElement(X,"rdf:type")
    q.set("rdf:resource", serial.Mapping.iri)
    q = ET.SubElement(X,"ser:MappingMetaTarget")
    q.set("rdf:resource", target)
    
    for k,v in properties.items():
        q = ET.SubElement(X,k)
        q.text =str(v)
    return X

In [12]:
ns_map = [('',"http://www.w3.org/2002/07/owl#"),
          ('xml:base',"http://www.w3.org/2002/07/owl"),
          ('rdf',"http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
          ('xml:owl',"http://www.w3.org/2002/07/owl#"),
          ('dc',"http://purl.org/dc/elements/1.1/"),
          ('rdfs',"http://www.w3.org/2000/01/rdf-schema#")
         ]
print(generate_xml_for_serialization(ns_map, sample ))

<rdf:RDF xmlns:="http://www.w3.org/2002/07/owl#" xmlns:xml:base="http://www.w3.org/2002/07/owl" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:xml:owl="http://www.w3.org/2002/07/owl#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#Serialization" />
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#Mapping" />
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#MetaClass" />
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#MetaProperty" />
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#MetaDataProperty" />
	<AnnotationProperty rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#ContainsMapping" />
	<AnnotationProperty rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/ser

In [13]:
Q = create_mapping("mapping_test", "test" , "class", {"ser:SerializationLabel" : "Class",
                                                      "ser:SerializationParentLabel" : "ParentClass"})
ET.indent(Q, space="\t", level=0)
print(ET.tostring(Q).decode())

<NamedIndividual rdf:about="mapping_test">
	<rdf:type rdf:resource="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#Mapping" />
	<ser:MappingMetaTarget rdf:resource="test" />
	<ser:SerializationLabel>Class</ser:SerializationLabel>
	<ser:SerializationParentLabel>ParentClass</ser:SerializationParentLabel>
</NamedIndividual>


In [14]:

# Reset and Start again from here. 
# Let's use the json sourced data file to construct the mapping details.
#1. Load config from json serialisation file
#2. Validate sections of the serialisation file using ontology
#3. Build the serialisation rdf file. 



import jsonschema
import json

import owlready2 as owlr

import xml.etree.ElementTree as ET



In [15]:
with open('/home/tomk/Documents/Coding/gitHub/datamodels/serialisation_schema.json', 'r') as file:
    schema = json.load(file)
    
with open('/home/tomk/Documents/Coding/gitHub/datamodels/sample_ser.json', 'r') as file:
    data = json.load(file)


In [16]:
jsonschema.validate(instance=data, schema=schema)

In [17]:
data

{'$schema': 'serialisation_schema.json',
 'serialization_iri': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#Sample_Serialisation',
 'serialization_label': 'Sample Serialisation',
 'targetOntology': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample',
 'targetClasses': ['http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass'],
 'targetProperties': ['http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someProperty'],
 'targetDataProperties': ['http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someDataProperty'],
 'serialization_mappings': [{'mapping_name': 'Parent_Class_Mapping',
   'label': 'ParentClass',
   'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass'},
  {'mapping_name': 'Class_Mapping',
   'label': 'Class',
   'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass',
   'parent_label': 'ParentClass'},
  {'mapping_name': 'Property_Mapping',
   'target': 'http://www.semanticweb.org/tomk

In [18]:
def create_mapping(name, target, ttype, properties={}):
    X = ET.Element("NamedIndividual")
    X.set("rdf:about", name)
    q = ET.SubElement(X,"rdf:type")
    q.set("rdf:resource", serial.Mapping.iri)
    q = ET.SubElement(X,"ser:MappingMetaTarget")
    q.set("rdf:resource", target)
    
    for k,v in properties.items():
        q = ET.SubElement(X,k)
        q.text =str(v)
    return X

def generate_meta_definitions(onto):
    meta_defs = []
    for mc in [(serial.MetaClass, onto.classes), (serial.MetaProperty, onto.object_properties), (serial.MetaDataProperty, onto.data_properties)]:
        for c in mc[1]():
            meta_defs.append((c.iri,mc[0].iri))
    return meta_defs

def generate_serialization_contents(serialization_label, serialization_iri, mappings):
    X = ET.Element("NamedIndividual")
    X.set("rdf:about", serialization_iri)
    q = ET.SubElement(X,"rdf:type")
    q.set("rdf:resource", serial.Serialization.iri)
    for m in mappings:
        q = ET.SubElement(X,"ser:ContainsMapping")
        q.set("rdf:resource", m[0])
        
    q = ET.SubElement(X,"rdfs:label")
    q.text=serialization_label
    return X
    
        
    
def generate_elemtree_header(serialization_label, serialization_iri, namespaces, ontology, mappings):
    X = ET.Element('rdf:RDF')
    default_namespace='http://www.company.com'
    for prefix, uri  in namespaces:
        X.set("xmlns:" + prefix, uri)
        
    c = ET.Comment("""///////////////////////////////////////////////////////////////////////////////////////
    //
    // Serialization Classes - these should remain static
    // High level serialization classes defined to describe a given Serialization
    //
    ///////////////////////////////////////////////////////////////////////////////////////""") 
    X.append(c)
    ## Add block for serialization objects:
    for iri in [serial.Serialization.iri, 
              serial.Mapping.iri,
              serial.MetaClass.iri,             
              serial.MetaProperty.iri,
              serial.MetaDataProperty.iri]:
        q = ET.SubElement(X, "Class")
        q.set("rdf:about", iri)
    
    
    
    comment = """///////////////////////////////////////////////////////////////////////////////////////
    //
    // Annotation properties - these should remain static
    // These are the internal technical components used by the loading/serialization process
    //
    ///////////////////////////////////////////////////////////////////////////////////////"""
    c = ET.Comment(comment)
    X.append(c)
    
    for iri in [ serial.ContainsMapping.iri,               
                 serial.MappingMetaTarget.iri,
                 serial.MappingDomain.iri,
                 serial.MappingRange.iri,
                 serial.MappingMetaTarget.iri,
                 serial.SerializationLabel.iri,
                 serial.SerializationParentLabel.iri]:
        q = ET.SubElement(X, "AnnotationProperty")
        q.set("rdf:about", iri)
        
    comment = """
    
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Individuals - here define all the final target classes and properties 
    // (Using the MetaClass, MetaProperty and MetaDataProperty that will be 
    // referenced by the serialization and populated by individual Mapping elements
    // using the MappingMetaTarget pointer. ) 
    // For any given ontology (or ontologies) this collection identifies and names key
    // Classes, Properties and Data Properties that the Serialization function will populate. 
    //
    ///////////////////////////////////////////////////////////////////////////////////////
     """
    c = ET.Comment(comment)
    X.append(c)
    
    for iri,tp in generate_meta_definitions(ontology):
        q = ET.SubElement(X, "NamedIndividual")
        q.set("rdf:about", iri)
        p = ET.SubElement(q, "rdf:type")
        p.set("rdf:resource", tp)
        
    comment = """
    
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Mappings - define all the Mappings that will be collated by this serialisation to
    // pull content from the `flat` recordset and assign it to classes, properties or 
    // data properties as defined in the overarching ontology. 
    //
    ///////////////////////////////////////////////////////////////////////////////////////
     """
    c = ET.Comment(comment)
    X.append(c)
    
    for m in mappings:
        X.append(m[1])
    
    comment = """<!--
    
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Serialization - define the named Serialization Object and assign the set of 
    // mappings that belong to that object.
    //
    ///////////////////////////////////////////////////////////////////////////////////////
     -->"""
    c = ET.Comment(comment)
    X.append(c)
    
    X.append(generate_serialization_contents(serialization_label, serialization_iri, mappings))
    
    ET.indent(X, space="\t", level=0)
    return X



def find_match(value, match_set_list):
    for e,t_set in enumerate(match_set_list):
        if value in t_set:
            return e
    return None
        
sample_ser_file = "../Serialization.owl"
serial = owlr.get_ontology(sample_ser_file).load()

def process_serialisation(json_data):
    s_label = json_data['serialization_label']
    s_iri = json_data['serialization_iri']
    s_onto = json_data['targetOntology']
    _fix_me_ontology_file = "../sample_ontology.owl"
    ontol = owlr.get_ontology(_fix_me_ontology_file).load()
    uri_base = ontol.base_iri
    
    ns_map = [('',"http://www.w3.org/2002/07/owl#"),
              ('xml:base',"http://www.w3.org/2002/07/owl"),
              ('rdf',"http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
              ('xml:owl',"http://www.w3.org/2002/07/owl#"),
              ('dc',"http://purl.org/dc/elements/1.1/"),
              ('rdfs',"http://www.w3.org/2000/01/rdf-schema#"), 
              ('ser', serial.base_iri)
             ]

    o_classes = [c.iri for c in ontol.classes()]
    o_properties =[p.iri for p in ontol.object_properties()]
    o_dataproperties = [d.iri for d in ontol.data_properties()]
    t_classes, t_properties, t_data_properties = json_data['targetClasses'], json_data['targetProperties'], json_data['targetDataProperties']
    if all([c in o_classes for c in t_classes]):
        print("classes ok")
    else:
        print(set(o_classes).symmetric_difference(set(t_classes)))
        
    if all([c in o_properties for c in t_properties]):
        print("properties ok")
    else:
        print(set(o_properties).symmetric_difference(set(t_properties)))
        
        
    if all([d in o_dataproperties for d in t_data_properties]):
        print("data properties ok")
    else:
        print(set(o_dataproperties).symmetric_difference(set(t_data_properties)))
    #print (t_classes, t_properties, t_data_properties)
    mapping_list = []
    for mapping in json_data['serialization_mappings']:
        t_match=find_match(mapping['target'],[t_classes, t_properties, t_data_properties])
        print(mapping, t_match)
        mapping_name = "".join([uri_base, mapping['mapping_name']])
        match t_match:
            case 0:
                # Classes
                properties = {k:v for k,v in {"ser:SerializationLabel" : mapping.get("label"),
                              "ser:SerializationParentLabel" : mapping.get("parent_label")}.items() if v is not None}
                
                mapping_list.append((mapping_name, create_mapping(mapping_name, mapping['target'], t_match, properties)))
            case 1:
                # Properties
                properties = {k:v for k,v in {"ser:MappingDomain" : mapping.get("domain"),
                              "ser:MappingRange" : mapping.get("range")}.items() if v is not None}
                mapping_list.append((mapping_name,create_mapping(mapping_name, mapping['target'], t_match, properties)))
                
            case 2:
                # Data Properties
                properties = {k:v for k,v in {"ser:MappingDomain" : mapping.get("domain"),
                              "ser:MappingRange" : mapping.get("range")}.items() if v is not None}
                mapping_list.append((mapping_name,create_mapping(mapping_name, mapping['target'], t_match, properties)))
                
            case None:
                # No Match
                print("**********************************************")
                print("**                                          **")
                print("**         This mapping not matched         **")
                print("**                                          **")
                print("**********************************************")
                print(mapping)
                print("**********************************************")
                print()
                print()
                assert False
    
    X = generate_elemtree_header(s_label, s_iri, ns_map, ontol, mapping_list)
                
    return X

In [19]:
r = process_serialisation(data)

classes ok
properties ok
data properties ok
{'mapping_name': 'Parent_Class_Mapping', 'label': 'ParentClass', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass'} 0
{'mapping_name': 'Class_Mapping', 'label': 'Class', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass', 'parent_label': 'ParentClass'} 0
{'mapping_name': 'Property_Mapping', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someProperty', 'domain': 'Class', 'range': 'Property'} 1
{'mapping_name': 'DataProperty_Mapping', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someDataProperty', 'domain': 'Class', 'range': 'DataProperty'} 2


In [24]:
ET.indent(r, space="\t", level=0)
comment = """<!--
Sample Schema Name
-->\n"""
xml_text = """<?xml version='1.0' encoding='utf-8'?>\n""" + comment + ET.tostring(r).decode()
print(xml_text)

<?xml version='1.0' encoding='utf-8'?>
<!--
Sample Schema Name
-->
<rdf:RDF xmlns:="http://www.w3.org/2002/07/owl#" xmlns:xml:base="http://www.w3.org/2002/07/owl" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:xml:owl="http://www.w3.org/2002/07/owl#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:ser="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#">
	<!--///////////////////////////////////////////////////////////////////////////////////////
    //
    // Serialization Classes - these should remain static
    // High level serialization classes defined to describe a given Serialization
    //
    ///////////////////////////////////////////////////////////////////////////////////////-->
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#Serialization" />
	<Class rdf:about="http://www.semanticweb.org/tomk/ontologies/2022/11/serialization#Mapping" />
	<Class rdf:about="h

In [21]:
assert False

AssertionError: 

In [None]:
import ipywidgets as widgets

https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html

In [None]:
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np

def f(m, b):
    plt.figure(2)
    x = np.linspace(-10, 10, num=1000)
    plt.plot(x, m * x + b)
    plt.ylim(-5, 5)
    plt.show()

interactive_plot = interactive(f, m=(-2.0, 2.0), b=(-3, 3, 0.5))
output = interactive_plot.children[-1]
output.layout.height = '450px'
interactive_plot

In [None]:
from ipywidgets import IntProgress
from IPython.display import display
import time

max_count = 100

f = IntProgress(min=0, max=max_count) # instantiate the bar
display(f) # display the bar

count = 0
while count <= max_count:
    f.value += 1 # signal to increment the progress bar
    time.sleep(.01)
    count += 1

In [None]:
caption = widgets.Label(value='Changes in source values are reflected in target1')
source, target1 = widgets.IntSlider(description='Source'),\
                  widgets.IntSlider(description='Target 1')
dl = widgets.dlink((source, 'value'), (target1, 'value'))
display(caption, source, target1)


In [None]:
data_labels = ["ParentClass", "Class", "Property", "DataProperty"]


_user_mapping_name = widgets.Text(
    value='Mapping_Name',
    placeholder='Enter name of the mapping',
    description='String:',
    disabled=False   
)

mapping_target_options = {p[0][p[0].find("#")+1:]:p for p in generate_meta_definitions(sample)}

_user_mapping_target = widgets.Combobox(
    # value='John',
    placeholder='Choose Target',
    options=list(mapping_target_options.keys()),
    description='Combobox:',
    ensure_option=True,
    disabled=False
)

_user_label_or_domain = widgets.Combobox(
    # value='John',
    placeholder='Enter Label or Domain',
    options=list(data_labels),
    description='Combobox:',
    ensure_option=True,
    disabled=False
)

_user_range = widgets.Combobox(
    # value='John',
    placeholder='Enter Range',
    options=list(data_labels),
    description='Combobox:',
    ensure_option=True,
    disabled=False
)




def f(a,b,c):
    print( str((a,b,c)))

def get_ttype(umt):
    return mapping_target_options[umt]
    


out_text = widgets.Text(
    value="test",
    placeholder='Type something',
    description='String:',
    disabled=False   
)

input_widgets_list = [_user_mapping_name, _user_mapping_target, _user_label_or_domain, _user_range]

#out = interactive(create_mapping, **{"name" : _user_mapping_name.value, "target" : _user_mapping_target.value, "ttype" : _user_label_or_domain.value,  "properties" : {}})

widgets.HBox([widgets.VBox(input_widgets_list)])


In [None]:
out

In [None]:
test = widgets.interactive_output(create_mapping, {'name': _user_mapping_name, 'target': _user_mapping_target, 'ttype': _user_label_or_domain})
test

In [None]:
generate_meta_definitions(sample)[0][0].find("#")

In [None]:
a = widgets.IntSlider(description='a')
b = widgets.IntSlider(description='b')
c = widgets.IntSlider(description='c')
def f(a, b, c):
    print('{}*{}*{}={}'.format(a, b, c, a*b*c))

out = widgets.interactive_output(f, {'a': a, 'b': b, 'c': c})

widgets.HBox([widgets.VBox([a, b, c]), out])