## Bridging combat messages spreadsheet to JSON 

In [276]:
from openpyxl.utils import get_column_letter
from openpyxl import load_workbook
from rdflib.namespace import XSD, RDF, OWL, RDFS
from rdflib import Graph, URIRef, Namespace, BNode, Literal
import owlrl
import json
import re
import shortuuid 

In [277]:
in_file = 'Bridging_Combat_Messages_Breakdown_v2.xlsx'
out_file = 'Bridging2.ttl'

In [278]:
# namespaces
cm = "http://purl.org/artiamas/cm/"
CM = Namespace(cm)

In [279]:
file = load_workbook(filename = in_file)
sheet = file.active

In [280]:
# read the spreadsheet data into a JSON-like python structure
all_data = []

last_column = len(list(sheet.columns))
last_row = len(list(sheet.rows))

for row in range(2, last_row + 1):
    data = {}
    data['sequence'] = row-1
    for column in range(2, last_column + 1):
        col = get_column_letter(column)
        property = sheet[col + str(1)].value
        value = sheet[col + str(row)].value
        if row > 1:
            data[property] = value
    all_data.append(data)

In [281]:
# custom BBode function adds a prefix to a short uuid sequence
def bnode(prefix = ''):
    if prefix:
        return BNode(prefix + '_' + shortuuid.uuid()[:10])
    else:
        return BNode(shortuuid.uuid()[:8])

In [282]:
# dictionary mapping a spreadsheet column name to a RDF property URI
str2property = {'Time':CM.time, 'Agent':CM.agent, 'sequence':CM.sequence, 
                'Type':CM.messageType, 'Level':CM.level, 'Grid':CM.grid,
                'Agent':CM.agent, 'Target':CM.target, 'Alert Messages':CM.alertMessage, 'id':CM.id}

In [283]:
# function mapping prop's value to a RDF property URI
def str2object (msg, prop, value):
    """ convert a string into a object or literal """
    if type(value) in [int, float]:
        return Literal(value)
    elif prop == 'Agent':
        return parse_agent(value)
    elif prop == 'Target':
        if value and " Task " in value:
            # sometimes the Target field has the task
            g.add((task, RDFS.label, Literal(value)))
            return task
        elif not msg['Agent']:
            return parse_agent(value)
        else:
            return parse_target(value, msg=msg)
    else:
        return Literal(value)

### Given a string (e.g., "B CO / 1 - 22") representing a military unit we've not seen before, we parse the string to infer the unit's type (e.g., CM.Company) and its superior units (e.g., Batallion 1; Regiment 22) nd their types, adding information to the RDF graph. The function returns the sgent's URI

In [284]:
def normalize_unit_name(text):
    """ returns a name with a slash between components,
     e.g., SCT_PLT/1/22_IN """
    if not text:
        return ''
    name = text.strip()
    if not name:
        return ''
    if '/' not in name:
        # sometimes there's just the local name
        name = name + ' / 1 / 22 IN'
    name = name.replace('-','/')
    name = name.replace(' ','_')
    if re.search('^\d_\d_CO', name):
        name = name[0] + '/' + name[1:]
    unit = [x.strip('_') for x in name.split('/')]
    name = '/'.join(unit)
    return name

def infer_unit_type(name):
    """ returns the type of a unit based on it's name, e.g.,
       SCT_PLT/1/22_IN  => CM.ScoutPlatoon """
    unit = name.split('/')
    unit0 = unit[0]
    if 'ENG_CO' in unit0:
        return CM.EngineeringCompany
    elif 'SCT_PLT' in unit0:
        return CM.ScoutPlatoon
    elif 'MORTAR_PLT' in unit0:
        return CM.MortarPlatoon
    elif 'CO' in unit0:
        return CM.Company
    elif len(unit) > 1 and 'CO' in unit[1]:
        return CM.Platoon
    else:
        print('Unrecognized unit type:', name)
        return CM.MilitaryUnit

def parse_target(text, field='target', infer_types=False, msg=None):
    return parse_agent(text, field='target', infer_types=infer_types)

def parse_agent(text, field='agent', infer_types=False, msg=None):
    """returns """
    global instances

    name = normalize_unit_name(text)
    if name in instances:
        # We've seen this before, so just return the instance
        return instances[name]
    
    unit_type = infer_unit_type(name)
    #print(f"{name} => {unit_type}")
      
    id = bnode('UNIT')
    instances[name] = id
    if field == 'agent':
        g.add((id, CM.force, CM.BLUE))
    elif field == 'target' and msg and msg['Agent']:
        g.add((id, CM.force, CM.RED))
    g.add((id, RDF.type, unit_type))
    g.add((id, RDFS.label, Literal(name)))
    
    if not infer_types:
        return id
    

        
    battalionid = cm + rest
    battalion_type = CM.Battalion

    regimentid = cm + regiment
    if 'IN' in regiment:
        regiment_type = CM.InfantryRegiment
    else:
        regiment_type = CM.Regiment
        
    # add instance data
    if platoonid:
        g.add((platoonid, RDF.type, CM.Platoon))
        g.add((platoonid, CM.unitOf, unitid))
    
    g.add((unitid, RDF.type, unit_type))
    g.add((unitid, CM.unitOf, battalionid))
    
    g.add((battalionid, RDF.type, battalion_type ))
    g.add((battalionid, CM.unitOf, regimentid))
    
    g.add((regimentid, RDF.type, regiment_type))
    
    return instance

In [285]:
def add_action(msg, msg_id):
    """ add an action to the message"""
    act = bnode('ACT')
    alert = msg["Alert Messages"].lower()
    agent = list(g.objects(msg_id, CM.agent))[0]
    target = list(g.objects(msg_id, CM.target))[0]
    #g.add((msg_id, CM.action, act))
    g.add((act, CM.message, msg_id))
    g.add((act, CM.time, list(g.objects(msg_id, CM.time))[0] ))
    g.add((act, CM.sequence, list(g.objects(msg_id, CM.sequence))[0] ))
    g.add((act, CM.alertMessage, Literal(alert)))
    if 'resupply' in alert:
        g.add((act, RDF.type, CM.Resupply))
        g.add((act, CM.recipient, target))
        if "(ammo)" in alert:
            g.add((act, CM.object, CM.AMMO))
        elif "(fuel)" in alert:
            g.add((act, CM.object, CM.FUEL))
    elif 'earned sa' in alert:
        g.add((act, RDF.type, CM.EarnedSA))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
    elif 'moving to fight' in alert:
        g.add((act, RDF.type, CM.Move))
        g.add((act, CM.subject, agent))
        g.add((act, CM.toward, target))
        g.add((act, CM.reason, Literal("attack")))
    elif re.match('attacking.*against', alert):
        g.add((act, RDF.type, CM.Attack))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
    elif 'fighting' in alert:
        g.add((act, RDF.type, CM.StartFight))
        g.add((act, CM.subject, agent))
        g.add((act, CM.toward, target))
    elif re.match('attacking.*ended', alert):
        g.add((act, RDF.type, CM.EndFight))
        g.add((act, CM.subject, agent))
        g.add((act, CM.toward, target))
    elif re.match('receiving.*fire$', alert):
        g.add((act, RDF.type, CM.Attack))
        g.add((act, CM.object, agent))
    elif re.match('receiving.*fire ended', alert):
        g.add((act, RDF.type, CM.EndAttack))
        g.add((act, CM.object, agent))
    elif "not going after opfor" in alert:
        g.add((act, RDF.type, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.Status, CM.UNABLE ))
        g.add((act, CM.reason, Literal(alert)))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, RDF.type, CM.Engage))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif "paused at crossing control point" in alert:
        g.add((act, RDF.type, CM.Report))
        g.add((act, CM.Status, CM.PAUSE ))
        if "crossing is not traversable" in alert:
            g.add((act, CM.reason, Literal("Crossing not traversable")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, RDF.type, CM.MOVE))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'task' in alert or 'Task' in msg['Alert Messages']:
        # g.add((act, RDF.type, CM.EndAttack))
        g.add((act, RDF.type, CM.TaskReport))
        g.add((act, CM.subject, agent))
        # g.add((act, CM.object, agent))
    elif 'waiting' in alert:
        g.add((act, RDF.type, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.Status, CM.WAIT ))
        if "can't attack without reinforcements" in alert:
            g.add((act, CM.reason, Literal("need reinforcements")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, RDF.type, CM.Attack))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'resume' in alert:
        g.add((act, RDF.type, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.Status, CM.RESUME))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, RDF.type, CM.Move))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'planned battle removed' in alert:
        g.add((act, RDF.type, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.Status, CM.END))
        if 'no real targets' in alert:
            g.add((act, CM.reason, Literal("no targets")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, RDF.type, CM.Attack))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'firing has stopped'in alert:
        g.add((act, RDF.type, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.Status, CM.END ))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, RDF.type, CM.Attack))
        g.add((act1, CM.object, agent))
    else:
        print("Unrecognized alert:", alert)

In [286]:
# Create the inital RDF graph 
g = Graph()
CM = Namespace('http://purl.org/artiamas/cm/')
g.bind("cm", CM, override=True)
g.bind("owl", OWL, override=True)
g.bind("rdf", RDF, override=True)
instances = {'':CM.NONE} # dict of names to bnodes
task = bnode('TASK')
g.add((task, RDF.type, CM.CrossingTask))

<Graph identifier=N9f1445134c6b4e0c92d273408f53397d (<class 'rdflib.graph.Graph'>)>

In [287]:
# read cm ontology into another graph
gcm = Graph()
gcm.parse("http://purl.org/artiamas/cm", format='ttl')

<Graph identifier=N905be3734ec8471daf0aa192bec2bd98 (<class 'rdflib.graph.Graph'>)>

In [288]:
print(g.serialize(format='ttl'))

@prefix cm: <http://purl.org/artiamas/cm/> .

[] a cm:CrossingTask .




### Add message objects to the graph

In [289]:
messages = []
actions = []
for msg in all_data:
    subj = bnode('MSG')
    messages.append(subj)
    g.add((subj, RDF.type, CM.CombatMessage))
    g.add((subj, CM.task, task))
    for prop, obj in msg.items():
        if prop not in str2property:
            print(f"Unrecognized property {prop}")
            continue
        obj = str2object(msg, prop, obj)
        prop = str2property[prop]
        g.add((subj, prop, obj))
    add_action(msg, subj)
g.add((task, CM.firstMessage, messages[0]))
g.add((task, CM.lastMessage, messages[-1]))
for i in range(len(messages)):
     g.add((messages[i], CM.nextMessage, messages[1+1]))

### compute deductive closure using owl-rl

In [290]:
# owlrl.DeductiveClosureDeductiveClosure(OWLRL_Extension, rdfs_closure = True, axiomatic_triples = True, datatype_axioms = True).expand(graph), rdfs_closure = True, axiomatic_triples = True, datatype_axioms = True).expand(g)

In [291]:
g2 = g + gcm
rdfs = owlrl.RDFSClosure.RDFS_Semantics(g2, True, True, True)
rdfs.closure()
rdfs.flush_stored_triples()

In [292]:
#g2 = g + gcm
#superTypeQuery = "select ?X ?ST {?X rdf:type/rdfs:subClassOf* ?ST}"
#qres = g2.query(superTypeQuery)

In [293]:
#for row in qres:
#    print(f"{row.X} a {row.ST}")
#    if row.ST in [OWL.Class, OWL.Restriction, CM.NONE, None] or row.X == CM.MilitaryUnit:
#          print(f"skipping {row.X} a {row.ST}")
#    else:
#        g2.add((row.X, RDF.type, row.ST))

In [294]:
g2.serialize(format='ttl', destination='test2.ttl')

<Graph identifier=Nac114ecd4d6349ea958631c9c92acb2c (<class 'rdflib.graph.Graph'>)>

In [295]:
print(g2.serialize(format='ttl'))

@prefix : <http://purl.org/artiamas/cm/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix wd: <http://www.wikidata.org/wiki/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

_:n0fee6e0577b2425f902269248c063c73b1 a rdfs:Class,
        rdfs:Resource,
        owl:Class ;
    rdfs:subClassOf _:n0fee6e0577b2425f902269248c063c73b1,
        rdfs:Resource ;
    owl:unionOf [ a rdf:List,
                rdfs:Resource ;
            rdf:first :Action ;
            rdf:rest [ a rdf:List,
                        rdfs:Resource ;
                    rdf:first :Message ;
                    rdf:rest () ] ] .

_:n0fee6e0577b2425f902269248c063c73b10 a rdfs:Class,
        rdfs:Resource,
        owl:Restriction ;
    rdfs:subClassOf _:n0fee6e0577b2425f902269248c063c73b10,
        rdfs:Resource ;
    owl:onProperty :unitOf ;
    owl:someValuesFrom :RegimentOrBrigade .

_:n0fee

In [296]:
for name in instances.keys():
    print(name)


SCT_PLT/1/22_IN
1/1_CO_(TRK)/1_BN/241_INF_BDE_(TM)
C_CO/1/22_IN
50_ENG_CO_(MRBC)/1/22_IN
A_CO/1/22_IN
D_CO/1/22_IN
MORTAR_PLT/1/22_IN
B_CO/1/22_IN
3/1_CO_(TRK)/1_BN/241_INF_BDE_(TM)
2/1_CO_(TRK)/1_BN/241_INF_BDE_(TM)


In [297]:
q = "select ?X ?ST {?X rdf:type/rdfs:subClassOf* ?ST }"
qres = g2.query(q)
for row in qres:
    if row.X == CM.MilitaryUnit:
        print('BAD ROW:', row)
    #print(f"{row.X} a {row.ST}")

BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))
BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Resource'))
BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))
BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Class'))
BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Class'))
BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Resource'))
BAD ROW: (rdflib.term.URIRef('http://purl.org/artiamas/cm/MilitaryUnit'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Resource'))
BAD ROW: (rdflib.term.URI