# Serialization Builder

After an ontology model is developed, there needs to be a method of uploading data that fits the model in question. 

This data load process is often fed by flat or tree-like datafiles - perhaps simple CSV, XML or JSON documents. 

A serialization helps marshall structured data into a given ontologically defined graph format. 

The serialization has a name, and contains multiple mappings. 

Each mapping describes some link between named attributes in the input file, and their equivalent ontological markup. Some inputs describe entities of a given type, while others describe relations between these entities, and others describe simple data-properties those entities might have. 



In [1]:

# Reset and Start again from here. 
# Let's use the json sourced data file to construct the mapping details.
#1. Load config from json serialisation file
#2. Validate sections of the serialisation file using ontology
#3. Build the serialisation rdf file. 



import jsonschema
import json

#import owlready2 as owlr

import xml.etree.ElementTree as ET



In [2]:
import sys, os
module_path = os.path.abspath(os.path.join('../src/models/core/serialization'))
if module_path not in sys.path:
    sys.path.append(module_path)
print(sys.path)
import serialization_builder as s_b

['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/tomk/environment/devpy/lib/python3.10/site-packages', '/home/tomk/Documents/Coding/gitHub/datamodels/src/models/core/serialization']




In [6]:
schemafilename = '/home/tomk/Documents/Coding/gitHub/datamodels/src/models/in-development/datamapping_ser.json'
    
t = s_b.process_json_serialization(schemafilename)

with open("../src/models/in-development/datamapping_ser.rdf", "w") as f:
    f.writelines(t)

print(t)

[]
[<Element <function Comment at 0x7fb61830b0a0> at 0x7fb61c39ac50>, <Element 'NamedIndividual' at 0x7fb61d1142c0>, <Element <function Comment at 0x7fb61830b0a0> at 0x7fb61d10e840>, <Element 'Class' at 0x7fb61d10f6a0>, <Element 'Class' at 0x7fb61d10f150>, <Element 'Class' at 0x7fb61d10f240>, <Element 'Class' at 0x7fb61d10ef70>, <Element 'Class' at 0x7fb61d10e2a0>, <Element 'Class' at 0x7fb61d10e200>, <Element 'Class' at 0x7fb61d10dfd0>, <Element 'Class' at 0x7fb61d10f740>, <Element <function Comment at 0x7fb61830b0a0> at 0x7fb61d10f8d0>, <Element 'AnnotationProperty' at 0x7fb61d10f380>, <Element 'AnnotationProperty' at 0x7fb61d10f1f0>, <Element 'AnnotationProperty' at 0x7fb61d10fa60>, <Element 'AnnotationProperty' at 0x7fb61d10f470>, <Element 'AnnotationProperty' at 0x7fb61d10e1b0>, <Element 'AnnotationProperty' at 0x7fb61d10e160>, <Element 'AnnotationProperty' at 0x7fb61d10dcb0>, <Element 'AnnotationProperty' at 0x7fb61d10dd50>, <Element 'AnnotationProperty' at 0x7fb61d10df80>, <Elem

'<?xml version=\'1.0\' encoding=\'utf-8\'?>\n<!--\n        Sample Schema Name\n        -->\n<rdf:RDF xmlns="http://www.w3.org/2002/07/owl#" xml:base="http://www.w3.org/2002/07/owl" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:ser="http://www.tkltd.org/ontologies/serialization#">\n\t<!--   \n    ///////////////////////////////////////////////////////////////////////////////////////\n    //\n    // Serialization - define the named Serialization Object and assign the set of \n    // mappings that belong to that object.\n    //\n    ///////////////////////////////////////////////////////////////////////////////////////-->\n\t<NamedIndividual rdf:about="http://www.tkltd.org/serializations/DataMapping_Serialisation">\n\t\t<rdf:type rdf:resource="http://www.tkltd.org/ontologies/serialization#Serialization" />\n\t\t<ser:IsComponentOfSerializ

In [4]:
schemafilename = '/home/tomk/Documents/Coding/gitHub/datamodels/src/models/sample/sample_ser.json'

t = s_b.process_json_serialization(schemafilename)

[('subclasses', {'a': 'http://www.tkltd.org/ontologies/sample#SubClassA', 'b': 'http://www.tkltd.org/ontologies/sample#SubClassB', 'c': 'http://www.tkltd.org/ontologies/sample#SubClassC'}), ('testcontents', {'1': 'X', '2': 'Y', '3': 'Z'})]
('subclasses', {'a': 'http://www.tkltd.org/ontologies/sample#SubClassA', 'b': 'http://www.tkltd.org/ontologies/sample#SubClassB', 'c': 'http://www.tkltd.org/ontologies/sample#SubClassC'})
////////////////////
{'id': 'http://www.tkltd.org/serializations/Sample_Serialisation36f27a110dd64f84a8d48b5503e6b6d7', 'type': 'http://www.tkltd.org/ontologies/serialization#TranslationMapping', 'label': 'subclasses', 'kvpairs': [('http://www.tkltd.org/serializations/Sample_Serialisation9a2f316aa94649da9e9b3f1dad18928d', 'http://www.tkltd.org/ontologies/serialization#MappingKVPair', 'a', 'http://www.tkltd.org/ontologies/sample#SubClassA'), ('http://www.tkltd.org/serializations/Sample_Serialisationda382ff629dc4d0ea17d58cba5ca56a0', 'http://www.tkltd.org/ontologies/s

In [5]:
j_data = {'$schema': 'serialisation_schema.json', 'serialization_iri': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#Sample_Serialisation', 'serialization_label': 'Sample Serialisation', 'targetOntology': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample', 'targetClasses': ['http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass', 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassA', 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassB', 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassC'], 'targetProperties': ['http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someProperty'], 'targetDataProperties': ['http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someDataProperty'], 'targetStaticProperties': ['http://www.w3.org/1999/02/22-rdf-syntax-ns#type'], 'source_headers': ['ParentClass', 'Class', 'Property', 'DataProperty', 'SubClassPointer'], 'translation_mappings': {'subclasses': {'key0': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassA', 'key1': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassB', 'key2': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassC'}}, 'serialization_mappings': [{'mapping_name': 'Parent_Class_Mapping', 'label': 'ParentClass', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass'}, {'mapping_name': 'Class_Mapping', 'label': 'Class', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someClass', 'parent_label': 'ParentClass'}, {'mapping_name': 'SubClassProperty_Mapping', 'domain': 'Class', 'target': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'range': 'SubClassPointer', 'translationMapping': 'subclasses'}, {'mapping_name': 'Property_Mapping', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someProperty', 'domain': 'Class', 'range': 'Property'}, {'mapping_name': 'DataProperty_Mapping', 'target': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#someDataProperty', 'domain': 'Class', 'range': 'DataProperty'}]}
j_data.keys()
for k,v in j_data.get("translation_mappings").items():
    print (k, v)

subclasses {'key0': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassA', 'key1': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassB', 'key2': 'http://www.semanticweb.org/tomk/ontologies/2023/6/sample#SubClassC'}


In [6]:
with open("../src/sample_ser.rdf", "w") as f:
    f.writelines(t)
    
print(t)

<?xml version='1.0' encoding='utf-8'?>
<!--
        Sample Schema Name
        -->
<rdf:RDF xmlns="http://www.w3.org/2002/07/owl#" xml:base="http://www.w3.org/2002/07/owl" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:ser="http://www.tkltd.org/ontologies/serialization#">
	<!--   
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Serialization - define the named Serialization Object and assign the set of 
    // mappings that belong to that object.
    //
    ///////////////////////////////////////////////////////////////////////////////////////-->
	<NamedIndividual rdf:about="http://www.tkltd.org/serializations/Sample_Serialisation">
		<rdf:type rdf:resource="http://www.tkltd.org/ontologies/serialization#Serialization" />
		<ser:IsComponentOfSerialization rdf:resource="http://www

In [4]:
from rdflib import URIRef, Literal, Graph, Namespace
ns_tuple=("ser", "http://www.tkltd.org/tomk/ontologies/serialization#")
URIRef("ser:test").n3().replace(ns_tuple[0]+":",ns_tuple[1])

'<http://www.tkltd.org/tomk/ontologies/serialization#test>'

In [3]:
schemafilename = '/home/tomk/Documents/Coding/gitHub/datamodels/src/models/DMEAR/DMEAR_ser.json'
with open(schemafilename, 'r') as file:
    jdata = json.load(file)
label_d = dict()
for m in s_b.fetch_mapping_values(jdata):
    try:
        label_d[m[3]['ser:SerializationLabel']]=set()
        print(m[3]['ser:SerializationLabel'])
    except:
        print(m[3]['ser:MappingDomain'], m[3]['ser:MappingRange'])
        if m[3]['ser:MappingDomain'] in label_d.keys():
            label_d[m[3]['ser:MappingDomain']].add(m[3]['ser:MappingRange'])
        else:
            label_d[m[3]['ser:MappingDomain']]=set([(m[3]['ser:MappingRange'])])
label_d.keys()

Namespace
Model
Class
Attribute
Domain
ParentDomain
Relationship
DomainEvent
FromClass
ToClass
FromAttribute
ToAttribute
Namespace Domain
Namespace Model
Namespace Class
Namespace Relationship
Class Attribute
Attribute Class
Domain Class
Class Domain
ParentDomain Domain
Domain ParentDomain
Domain DomainEvent
DomainEvent Domain
Domain DomainParticipant
DomainParticipant Domain
Model Class
Class Model
Relationship RelationshipLabel
Relationship RelationshipDescription
Model Relationship
Relationship Model
Relationship FromClass
Relationship ToClass
Relationship FromAttribute
Relationship ToAttribute
Relationship FromCardinality
Relationship ToCardinality
Model ModelType
Namespace NamespaceLabel
Namespace NamespaceDescription
Model ModelLabel
Model ModelDescription
Domain DomainLabel
Domain DomainDescription
DomainParticipant DomainParticipantLabel
DomainParticipant DomainParticipantDescription
DomainEvent DomainEventDescription
DomainEvent DomainEventLabel
DomainEvent DomainEventDescript

dict_keys(['Namespace', 'Model', 'Class', 'Attribute', 'Domain', 'ParentDomain', 'Relationship', 'DomainEvent', 'FromClass', 'ToClass', 'FromAttribute', 'ToAttribute', 'DomainParticipant'])

In [4]:
label_d

{'Namespace': {'Class',
  'Domain',
  'Model',
  'NamespaceDescription',
  'NamespaceLabel',
  'Relationship'},
 'Model': {'Class',
  'ModelDescription',
  'ModelLabel',
  'ModelType',
  'Relationship'},
 'Class': {'Attribute', 'ClassDescription', 'ClassLabel', 'Domain', 'Model'},
 'Attribute': {'AttributeDescription',
  'AttributeLabel',
  'Class',
  'DataType',
  'IsPK',
  'Nulls',
  'Sequence'},
 'Domain': {'Class',
  'DomainDescription',
  'DomainEvent',
  'DomainLabel',
  'DomainParticipant',
  'ParentDomain'},
 'ParentDomain': {'Domain'},
 'Relationship': {'FromAttribute',
  'FromCardinality',
  'FromClass',
  'Model',
  'RelationshipDescription',
  'RelationshipLabel',
  'ToAttribute',
  'ToCardinality',
  'ToClass'},
 'DomainEvent': {'Domain', 'DomainEventDescription', 'DomainEventLabel'},
 'FromClass': set(),
 'ToClass': set(),
 'FromAttribute': set(),
 'ToAttribute': set(),
 'DomainParticipant': {'Domain',
  'DomainParticipantDescription',
  'DomainParticipantLabel'}}

In [3]:
schemafilename = '/home/tomk/Documents/Coding/gitHub/datamodels/src/models/DMCAR/DMCAR_ser.json'

t = s_b.process_json_serialization(schemafilename)

with open("../src/models/DMCAR/DMCAR_ser.rdf", "w") as f:
    f.writelines(t)
    
print(t)

[]
[<Element <function Comment at 0x7f40815212d0> at 0x7f4094392160>, <Element 'NamedIndividual' at 0x7f4094392250>, <Element <function Comment at 0x7f40815212d0> at 0x7f4094350a90>, <Element 'Class' at 0x7f40943530b0>, <Element 'Class' at 0x7f4094353100>, <Element 'Class' at 0x7f4094353150>, <Element 'Class' at 0x7f40943531a0>, <Element 'Class' at 0x7f40943531f0>, <Element 'Class' at 0x7f4094353240>, <Element 'Class' at 0x7f4094353290>, <Element 'Class' at 0x7f40943532e0>, <Element <function Comment at 0x7f40815212d0> at 0x7f4094353330>, <Element 'AnnotationProperty' at 0x7f40943536a0>, <Element 'AnnotationProperty' at 0x7f40943536f0>, <Element 'AnnotationProperty' at 0x7f4094353740>, <Element 'AnnotationProperty' at 0x7f4094353790>, <Element 'AnnotationProperty' at 0x7f40943537e0>, <Element 'AnnotationProperty' at 0x7f4094353830>, <Element 'AnnotationProperty' at 0x7f4094353880>, <Element 'AnnotationProperty' at 0x7f40943538d0>, <Element 'AnnotationProperty' at 0x7f4094353920>, <Elem