# Comparing versions of the CIDOC-CRM

Script comparing versions 7.1.1 and 7.1.2 of the CIDOC-CRM to check what exactly has changed. It creates Json files for the differences to be imported into OntoME.

### Library imports

In [33]:
import lxml.etree as et
from itertools import chain
import numpy as np
import json

### XML imports

In [3]:
xml_711 = "input/cidoc_crm_v7.1.1.xml"
xml_712 = "input/cidoc_crm_v7.1.2.xml"

with open(xml_711) as xmlfile:
    tree = et.parse(xmlfile)
    root_711 = tree.getroot()

with open(xml_712) as xmlfile:
    tree = et.parse(xmlfile)
    root_712 = tree.getroot()

### Counting classes and properties

... to check none was added or removed.

In [4]:
print(len(root_711.findall(".//class")), " classes in 7.1.1.")
print(len(root_712.findall(".//class")), " classes in 7.1.2.")
print(len(root_711.findall(".//property")), " properties in 7.1.1.")
print(len(root_712.findall(".//property")), " properties in 7.1.2.")

81  classes in 7.1.1.
81  classes in 7.1.2.
160  properties in 7.1.1.
160  properties in 7.1.2.


### Comparing classes

In [34]:
changed_classes = {}

for classe in root_711.findall('.//class'):
    
    # A marker to avoid looping on the next classes
    # once the matching class has been found in 7.1.2.
    
    gotit = False
    
    # The class change dictionary.
    changes = {}
    
    ident = classe.get('id')
    className = classe.find('className').text
    examples = classe.find('examples').text
    fullName = classe.find('fullName').text
    inFirstOrderLogic = classe.find('inFirstOrderLogic').text
    scopeNote = classe.find('scopeNote').text
    
    sCO = classe.find('subClassOf')
    if sCO != None:
        subClassOf = sCO.get('id')
    else:
        subClassOf = None
    
    # Now looping on 7.1.2 classes, until the matching
    # one is found. Then we change the marker value
    # and start comparing contents.
    
    while gotit == False:
    
        for cl712 in root_712.findall('.//class'):
            if cl712.get('id') == ident:
                gotit = True
                
                if cl712.find('className').text != className:
                    changes['className'] = cl712.find('className').text
                    
                if cl712.find('examples').text != examples:
                    changes['examples'] = cl712.find('examples').text
                    
                if cl712.find('fullName').text != fullName:
                    changes['fullName'] = cl712.find('fullName').text
                    
                if cl712.find('inFirstOrderLogic').text != inFirstOrderLogic:
                    changes['inFirstOrderLogic'] = cl712.find('inFirstOrderLogic').text
                    
                if cl712.find('scopeNote').text != scopeNote:
                    changes['scopeNote'] = cl712.find('scopeNote').text
                
                # This one is optional, so the test is longer.
                sCO2 = cl712.find('subClassOf')
                if sCO2 != None:
                    subClassOf2 = sCO2.get('id')
                else:
                    subClassOf2 = None
                
                if subClassOf2 != subClassOf:
                    changes['subClassOf'] = subClassOf2
    
    if gotit == False:
        print(f"Class {ident} has no equivalent in version 7.1.2.")
    
    if len(changes.keys()) != 0:
        changed_classes[ident] = changes
        
print(f'There are {len(changed_classes.keys())} classes to change.')

There are 29 classes to change.


### Comparing properties

In [37]:
changed_props = {}

for prop in root_711.findall('.//property'):
    
    gotit = False
    
    changes = {}
    
    ident = prop.get('id')
    directName = prop.find('directName').text
    
    if prop.find('examples') is not None:
        examples = prop.find('examples').text
    else:
        examples = None
    
    fullName = prop.find('fullName').text
    scopeNote = prop.find('scopeNote').text
    quantification = prop.find('quantification').text
    
    if prop.find('propertyOfProperty') is not None:
        propOfProp = prop.find('propertyOfProperty').text
    else:
        propOfProp = None
    
    dom = prop.find('domain')
    if dom != None:
        domain = dom.get('id')
    else:
        domain = None
    
    rg = prop.find('range')
    if rg != None:
        prange = rg.get('id')
    else:
        prange = None
    
    sPO = prop.find('subPropertyOf')
    if sPO != None:
        subPropOf = []
        for overprop in prop.findall('subPropertyOf'):
            subPropOf.append(overprop.get('id'))
    else:
        subPropOf = None
    
    while gotit == False:
    
        for p712 in root_712.findall('.//property'):
            if p712.get('id') == ident:
                gotit = True
                
                # Simple comparison for those who are always
                # there and always once.
                
                if p712.find('directName').text != directName:
                    changes['directName'] = p712.find('directName').text
                    
                if p712.find('examples') is not None:
                    if p712.find('examples').text != examples:
                        changes['examples'] = p712.find('examples').text
                    
                if p712.find('fullName').text != fullName:
                    changes['fullName'] = p712.find('fullName').text
                    
                if p712.find('scopeNote').text != scopeNote:
                    changes['scopeNote'] = p712.find('scopeNote').text
                
                if p712.find('propertyOfProperty') is not None:
                    if p712.find('propertyOfProperty').text != propOfProp:
                        changes['propertyOfProperty'] = p712.find('propertyOfProperty').text
                    
                if p712.find('quantification').text != quantification:
                    changes['quantification'] = p712.find('quantification').text
                
                # For subPropertyOf, which is sometimes there, sometimes not,
                # sometimes more than once. First explore the 7.1.2 property contents.
                
                spo2 = p712.find('subPropertyOf')
                if spo2 != None:
                    subPropOf2 = []
                    for overprop in p712.findall('subPropertyOf'):
                        subPropOf2.append(overprop.get('id'))
                else:
                    subPropOf2 = None
                
                # Set a marker.
                changeSPO = False
                
                # If 7.1.1 and 7.1.2 both have subPropertyOf elements,
                # check if the contents are the same.
                if type(subPropOf2) == list and type(subPropOf) == list:
                    for item in subPropOf2:
                        if item not in subPropOf:
                            changeSPO = True
                    if changeSPO == True:
                        changes['subPropertyOf'] = subPropOf2
                
                elif type(subPropOf2) == list or type(subPropOf) == list:
                    changes['subPropertyOf'] = subPropOf2
                
                # Now for the optional elements.
                    
                ran2 = p712.find('range')
                if ran2 != None:
                    range2 = ran2.get('id')
                else:
                    range2 = None
                
                if range2 != prange:
                    changes['range'] = range2
                    
                    
                dom2 = p712.find('domain')
                if dom2 != None:
                    domain2 = dom2.get('id')
                else:
                    domain2 = None
                
                if domain2 != domain:
                    changes['domain'] = domain2
                    
    if gotit == False:
        print(f"Property {ident} has no equivalent in version 7.1.2.")
    
    if len(changes) != 0:
        changed_props[ident] = changes
        
print(f'There are {len(changed_props.keys())} properties to change.')

There are 70 properties to change.


### Showing exactly what kind of information will need changing

In [None]:
classvalues = []
propvalues = []

for classe in changed_classes.keys():
    for classkey in changed_classes[classe].keys():
        classvalues.append(classkey)

for prop in changed_props.keys():
    for propkey in changed_props[prop].keys():
        propvalues.append(propkey)

In [28]:
classprint = {}
propprint = {}

for item in np.unique(classvalues):
    classprint[item] = classvalues.count(item)
for item in np.unique(propvalues):
    propprint[item] = propvalues.count(item)
    
print("À changer dans les classes : ", classprint)
print("À changer dans les propriétés : ", propprint)

À changer dans les classes :  {'examples': 23, 'scopeNote': 15}
À changer dans les propriétés :  {'examples': 35, 'fullName': 1, 'range': 2, 'scopeNote': 45, 'subPropertyOf': 2}


### Making final files

In [32]:
classes_examples = {}
classes_scopeNote = {}
properties_examples = {}
properties_scopeNote = {}

for classe in changed_classes.keys():
    if 'examples' in changed_classes[classe].keys():
        classes_examples[classe] = changed_classes[classe]['examples']
    if 'scopeNote' in changed_classes[classe].keys():
        classes_scopeNote[classe] = changed_classes[classe]['scopeNote']
        

for prop in changed_props.keys():
    if 'examples' in changed_props[prop].keys():
        properties_examples[prop] = changed_props[prop]['examples']
    if 'scopeNote' in changed_props[prop].keys():
        properties_scopeNote[prop] = changed_props[prop]['scopeNote']
        
with open('output/classes_examples.json', 'w') as clex:
    json.dump(classes_examples, clex, ensure_ascii = False)
with open('output/classes_scopeNote.json', 'w') as clsn:
    json.dump(classes_scopeNote, clsn, ensure_ascii = False)
with open('output/properties_examples.json', 'w') as prex:
    json.dump(properties_examples, prex, ensure_ascii = False)
with open('output/properties_scopeNote.json', 'w') as prsn:
    json.dump(properties_scopeNote, prsn, ensure_ascii = False)