## Bridging combat messages spreadsheet to a knowledge graph 

In [22]:
from openpyxl.utils import get_column_letter
from openpyxl import load_workbook
from rdflib.namespace import XSD, RDF, OWL, RDFS
from rdflib import Graph, URIRef, Namespace, BNode, Literal
#import json
import re
import shortuuid 
from collections import defaultdict

### Some global variables

In [23]:
#in_file = 'Bridging_Combat_Messages_Breakdown_v2.xlsx'
in_file = 'Bridging_v3.1.xlsx'
out_file = 'sim_v3.nt'

# keeps track of messages we don't understand
unrecognized_alerts = defaultdict(int)

# namespaces
cm = "http://purl.org/artiamas/cm/"
CM = Namespace(cm)

# objects seen in Target field
objname2IRI = {}

def getObj(label):
    """ returns IRI for task or object found in the Target field, creating it if neccessary """
    if label in objname2IRI:
        return objname2IRI[label]
    # not seen before, so create it
    if "Crossing Task" in label:
        obj = bnode('TASK')
        g.add((obj, RDF.type, CM.CrossingTask))
        g.add((simulation, CM.task, obj))
    elif "River Terrain" in label:
        obj = bnode('OBJ')
        g.add((obj, RDF.type, CM.Obstacle))
    else:
        print('Unrecognized object:', label)
        objname2IRI[label] = None
        return None
    g.add((obj, RDFS.label, Literal(label)))
    objname2IRI[label] = obj
    return obj
   

In [24]:
# custom BBode function adds a prefix to a short uuid sequence
def bnode(prefix = ''):
    if prefix:
        return BNode(prefix + '_' + shortuuid.uuid()[:5])
    else:
        return BNode(shortuuid.uuid()[:5])

### Create inital RDF graph 

In [25]:
# Create the inital RDF graph 
g = Graph()
CM = Namespace('http://purl.org/artiamas/cm/')
g.bind("cm", CM, override=True)
g.bind("owl", OWL)
g.bind("rdf", RDF)
g.bind('rdfs', RDFS)

simulation = bnode('SIM')
g.add((simulation, RDF.type, CM.Simulation))

task = bnode('TASK')
g.add((task, RDF.type, CM.CrossingTask))
g.add((simulation, CM.task, task))

instances = {'':CM.NONE} # dict of names to bnodes

In [26]:
file = load_workbook(filename = in_file)
sheet = file.active

In [27]:
# read the spreadsheet data into a JSON-like python structure
all_data = []

last_column = len(list(sheet.columns))
last_row = len(list(sheet.rows))

print(f"sheet has {len(list(sheet.columns))} columns and {len(list(sheet.rows))} rows")

for row in range(2, last_row + 1):
    data = {}
    data['sequence'] = row-1
    for column in range(1, last_column + 1):
        col = get_column_letter(column)
        property = sheet[col + str(1)].value
        value = sheet[col + str(row)].value
        if row > 1:
            data[property] = value
    all_data.append(data)

sheet has 8 columns and 127 rows


In [28]:
# dictionary mapping a spreadsheet column name to a RDF property URI
str2property = {'Time':CM.time, 'Agent':CM.agent, 'sequence':CM.sequence, 
                'Type':CM.messageType, 'Level':CM.level, 'Grid':CM.grid,
                'Agent':CM.agent, 'Target':CM.target, 'Alert Messages':CM.alertMessage,
                'Force':CM.force, 'id':CM.id}

In [29]:
# function mapping a message's prop value to a RDF property URI
def str2object(msg, prop, value):
    """ convert a string into a object or literal """
    if prop == 'Force':
        if value in ["B", "BLUFOR"]:
            return CM.BLUE
        elif value in ["R", "REDFOR"]:
            return CM.RED
        else:
            print('Unrecognized force value:', value)
            return CM.NONE
    elif type(value) in [int, float]:
        return Literal(value)
    elif prop == 'Agent':
        #print('msg:', msg)
        return parse_agent(value, msg=msg)
    elif prop == 'Target':
        if value and "[1]" in value:
            # sometimes the Target field has an object reference (e.g., task or obstacle)
            return getObj(value)
        elif not msg['Agent']:
            return parse_agent(value, msg=msg)
        else:
            return parse_target(value, msg=msg)
    else:
        return Literal(value)
    
def forceIRI(value):
    # 
    if value in ["B", "BLUFOR"]:
        return CM.BLUE
    elif value in ["R", "REDFOR"]:
        return CM.RED
    else:
        print('Unrecognized force value:', value)
        return CM.NONE

### Given a string (e.g., "B CO / 1 - 22") representing a military unit we've not seen before, we parse the string to infer the unit's type (e.g., CM.Company) and its superior units (e.g., Battalion 1; Regiment 22) nd their types, adding information to the RDF graph. The function returns the sgent's URI

In [30]:
def normalize_unit_name(text):
    """ returns a name with a slash between components,
     e.g., SCT_PLT/1/22_IN """
    if not text:
        return ''
    name = text.strip()
    if not name:
        return ''
    if '/' not in name:
        # sometimes there's just the local name
        name = name + ' / 1 / 22 IN'
    name = name.replace('-','/')
    name = name.replace(' ','_')
    if re.search('^\d_\d_CO', name):
        name = name[0] + '/' + name[1:]
    unit = [x.strip('_') for x in name.split('/')]
    name = '/'.join(unit)
    return name

def infer_unit_type_and_parent(name):
    """ returns unit type based on its name, e.g.; SCT_PLT/1/22_IN  => CM.ScoutPlatoon 
    standard hierarchy: squad<platoon<company<battalion<brigade|regiment<division<corps
    """
    units = name.split('/')
    unit0 = units[0]
    parent = '/'.join(units[1:])
    if 'ENG_CO' in unit0:
        unit_type = CM.EngineeringCompany
    elif 'SCT_PLT' in unit0:
        unit_type = CM.ScoutPlatoon
    elif 'MORTAR_PLT' in unit0:
        unit_type = CM.MortarPlatoon
    elif 'CO' in unit0:
        unit_type = CM.Company
    elif len(units) > 1 and 'CO' in units[1]:
        unit_type = CM.Platoon
    else:
        print('Unrecognized unit type:', name)
        unit_type = CM.MilitaryUnit
    return (unit_type, parent)

def parse_target(text, field='target', infer_units=True, msg=None):
    return parse_agent(text, field='target', infer_units=infer_units, msg=msg)

def parse_agent(text, field='agent', infer_units=True, msg=None):
    """returns entity iri id """
    global instances

    name = normalize_unit_name(text)
    if name in instances:
        # We've seen this before, so just return the instance
        return instances[name]
    
    unit_type, parent = infer_unit_type_and_parent(name)
      
    id = URIRef(cm + 'UNIT_'+ name)
    instances[name] = id
    if field == 'agent':
        # agent's force is same as msg force
        if msg['Force'] in ['B', 'BLUFOR']:
            force = CM.BLUE
        elif msg['Force'] in ['R', 'REDFOR']:
            force = CM.RED
        g.add((id, CM.force, force))
    elif field == 'target' and msg and msg['Agent']:
        # target's force (if there is an agent) opposite of msg force
        if msg['Force'] in ['B', 'BLUFOR']:
            force = CM.RED
        elif msg['Force'] in ['R', 'REDFOR']:
            force = CM.BLUE
        g.add((id, CM.force, force))
    g.add((id, CM.isa, unit_type))
    g.add((id, RDFS.label, Literal(name)))
    
    if infer_units:
        infer_superunits(id, unit_type, force, parent)
    return id
  
def get_unit_type(t):
    if 'Platoon' in t:
        return 'platoon'
    elif 'Company' in t:
        return 'company'
    elif 'Brigade' in t:
        return 'brigade'
    elif 'Battalion' in t:
        return 'battalion'
    elif 'Regiment' in t:
        return 'regiment'
    else:
        print('Unrecognized unit type:', t)
        return None

def infer_superunits(subid, subunit_type, force, name):
    #print(f"calling infer_superunits({subid}, {subunit_type}, {force}, {name})")
    if not name:
        return None
    id = URIRef(cm + 'UNIT_'+ name)
    instances[name] = id
    g.add((id, CM.force, force))
    g.add((id, RDFS.label, Literal(name)))
    g.add((subid, CM.partOf, id))
    
    units = name.split('/')
    unit0 = units[0]
    parent = '/'.join(units[1:])
    
    subtype = get_unit_type(subunit_type)
    
    if subtype == "platoon":
        # platoon can be part of a company or battalion
        if 'CO' in unit0:
            unit_type = CM.Company
        else:
            unit_type = CM.Battalion
        g.add((id, CM.isa, unit_type))
    elif subtype == "company":
        # company always part of a battalion
        unit_type = CM.Battalion
        g.add((id, CM.isa, unit_type))
    elif subtype == "battalion":
        # battalion can be part of a brigade or regiment
        if "INF BDE" in unit0:
            unit_type = CM.InfantryBrigade
        elif "BDE" in unit0:
            unit_type = CM.Brigade
        elif "IN" in unit0:
            unit_type = CM.InfantryRegiment
        else:
            unit_type = CM.Regiment
        g.add((id, CM.isa, unit_type))
    else:
        print(f"Unrecognized subunit type {unit0} for ({subunit_type}, {force}, {name}")
        return None
    
    if parent:
        infer_superunits(id, unit_type, force, parent)

def duration(time1, time2):
    # won't work if we cross a day boundry :-( 
    # H+08:12 
    h1,m1  = time1.split('+')[1].split(':')
    h2,m2  = time2.split('+')[1].split(':')
    return str(h2-h1) + ':' + str(m2-m1)
    
def time_to_minutes(time):
    h, m  = [int(x) for x in time.split('+')[1].split(':')]
    return 60*h + m
        

In [31]:
def add_action(msg, msg_id):
    """ add an action to the message"""
    act = bnode('ACT')
    alert = msg["Alert Messages"].lower()
    agent = list(g.objects(msg_id, CM.agent))[0]
    target = list(g.objects(msg_id, CM.target))[0]
    time = list(g.objects(msg_id, CM.time))[0]

    g.add((act, CM.message, msg_id))
    g.add((act, CM.time, list(g.objects(msg_id, CM.time))[0] ))
    g.add((act, CM.minutes, list(g.objects(msg_id, CM.minutes))[0] ))
    g.add((act, CM.sequence, list(g.objects(msg_id, CM.sequence))[0] ))
    g.add((act, CM.alertMessage, Literal(alert)))
    
    
    if 'resupply' in alert:
        g.add((act, CM.isa, CM.Resupply))
        g.add((act, CM.recipient, target))
        if "(ammo)" in alert:
            g.add((act, CM.object, CM.AMMO))
        elif "(fuel)" in alert:
            g.add((act, CM.object, CM.FUEL))
    elif ' sa ' in alert:
        actType = CM.EarnedSA if 'earned' in alert else CM.LostSA
        g.add((act, CM.isa, act))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
    elif 'moving into range' in alert:
        reason = Literal(alert[15:].strip(', '))
        g.add((act, CM.isa, CM.Move))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
        g.add((act, CM.reason, reason))
    elif 'in df range' in alert:
        actType = CM.Attack if '(attacking)' in alert else CM.Engage
        g.add((act, CM.isa, actType))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
    elif 'moving to fight' in alert or \
         'adjust route to fight' in alert:
        g.add((act, CM.isa, CM.MoveToward))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
        g.add((act, CM.reason, Literal("attack")))
    elif 'firing' in alert:
        actType = CM.FiringOn if ' on' in alert else CM.FiringEnded
        g.add((act, CM.isa, act))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
    elif re.match('attacking.*against', alert):
        g.add((act, CM.isa, CM.Attack))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, target))
    elif 'fighting' in alert:
        g.add((act, CM.isa, CM.StartFight))
        g.add((act, CM.subject, agent))
        g.add((act, CM.toward, target))
    elif re.match('attacking.*ended', alert):
        g.add((act, CM.isa, CM.EndFight))
        g.add((act, CM.subject, agent))
        g.add((act, CM.toward, target))
    elif re.match('receiving.*fire$', alert):
        g.add((act, CM.isa, CM.Attack))
        g.add((act, CM.object, agent))
    elif re.match('receiving.*fire ended', alert):
        g.add((act, CM.isa, CM.EndAttack))
        g.add((act, CM.object, agent))
    elif "not going after opfor" in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.status, Literal("unable")))
        g.add((act, CM.reason, Literal(alert)))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.Engage))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif "can't create directfires" in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.status, Literal(alert)))
    elif "paused at crossing control point" in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.status, Literal("pause" )))
        if "crossing is not traversable" in alert:
            g.add((act, CM.reason, Literal("crossing not traversable")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.MOVE))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'crossing begin' in alert:
        act = bnode('ACT')
        g.add((act, CM.isa, CM.Crossing))
        g.add((act, CM.subj, agent))
        g.add((act, CM.object, target))
        g.add((act, CM.startTime, time))
    elif 'crossing completed' in alert:
        # find corresponding act?
        act = bnode('ACT')
        g.add((act, CM.isa, CM.Crossing))
        g.add((act, CM.subj, agent))
        g.add((act, CM.object, target))
        g.add((act, CM.endTime, time))
    elif 'task' in alert or 'crossing' in alert:
        g.add((act, CM.isa, CM.TaskReport))
        g.add((act, CM.subject, agent))
        g.add((act, CM.object, task))
        if 'start' in alert or 'begin' in alert:
            g.add((act, CM.status, Literal("started")))
            g.add((task, CM.startTime, time))
        elif 'complete' in alert:
            g.add((act, CM.status, Literal("completed")))
            g.add((act, CM.status, Literal("ended")))
            g.add((task, CM.status, Literal("completed")))
            g.add((task, CM.endTime, time))
    elif 'pause' in alert and 'obstacle' in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.status, Literal("pause")))
        g.add((act, CM.reason, target))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.MOVE))
        g.add((act1, CM.subject, agent))
    elif 'waiting' in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.status, Literal("wait" )))
        if "can't attack without reinforcements" in alert:
            g.add((act, CM.reason, Literal("need reinforcements")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.Attack))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'resume' in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.status, Literal("resume")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.Move))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'planned battle removed' in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.status, Literal("end")))
        if 'no real targets' in alert:
            g.add((act, CM.reason, Literal("no targets")))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.Attack))
        g.add((act1, CM.subject, agent))
        g.add((act1, CM.object, target))
    elif 'firing has stopped'in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.Status, CM.END ))
        act1 = bnode('ACT')
        g.add((act, CM.action, act1 ))
        g.add((act1, CM.isa, CM.Attack))
        g.add((act1, CM.object, agent))
    elif 'cbe' in alert and 'terminated' in alert:
        g.add((act, CM.isa, CM.Report))
        g.add((act, CM.subject, agent))
        g.add((act, CM.subject, target))
        g.add((act, CM.reason, Literal(alert)))
    elif 'detect' in alert:
        g.add((act, CM.isa, CM.Detect))
        g.add((act, CM.subject, agent))
        g.add((act, CM.subject, target))
        g.add((act, CM.reason, Literal(alert)))
    else:
        print("Unrecognized alert:", alert)
        unrecognized_alerts[alert] += 1
        return CM.Action
    return act

### Add message objects to the graph

In [32]:
messages = []
actions = []
print(f"Read to process {len(all_data)} messages")
for msg in all_data:
    subj = bnode('MSG')
    messages.append(subj)
    g.add((subj, RDF.type, CM.CombatMessage))
    #g.add((subj, CM.task, task))
    for prop, obj in msg.items():
        #print('prop, obj:', prop, obj)
        if prop not in str2property:
            print(f"Unrecognized property {prop}")
            continue
        obj = str2object(msg, prop, obj)
        prop = str2property[prop]
        #print('prop, obj:', prop, obj)
        g.add((subj, prop, obj))
    # new properties
    g.add((subj, CM.minutes, Literal(time_to_minutes(msg['Time'])) ))
    act = add_action(msg, subj)
    actions.append(act)
g.add((simulation, CM.firstMessage, messages[0]))
g.add((simulation, CM.lastMessage, messages[-1]))
g.add((simulation, CM.firstAction, actions[0]))
g.add((simulation, CM.lastAction, actions[-1]))
for i in range(len(messages)-1):
     g.add((messages[i], CM.nextMessage, messages[i+1]))
     g.add((actions[i], CM.nextAction, actions[i+1]))
print(f"Added {len(messages)} messages and {len(actions)} actions")


Read to process 126 messages
Added 126 messages and 126 actions


In [33]:
# read cm ontology into another graph
gcm = Graph()
gcm.parse("http://purl.org/artiamas/cm", format='ttl')

<Graph identifier=Nf1abb41726d045c89003e2756c785133 (<class 'rdflib.graph.Graph'>)>

In [34]:
# import owlrl
# g2 = g + gcm
# rdfs = owlrl.RDFSClosure.RDFS_Semantics(g2, True, True, True)
# rdfs.closure()
# rdfs.flush_stored_triples()

In [35]:
# we use CM.isa for immediate types. add rdf:type assersions
for row in g.query("select ?X ?T {?X cm:isa ?T}"):
    g.add((row.X, RDF.type, row.T))
g2 = g + gcm
# Add inferred types
for row in g2.query("select ?X ?ST {?X rdf:type/rdfs:subClassOf* ?ST}"):
    g.add((row.X, RDF.type, row.ST))

In [36]:
g.serialize(format='ntriples', destination=out_file)



<Graph identifier=Nd6a4dbbbeef44c229ba15fa85855e4b3 (<class 'rdflib.graph.Graph'>)>

In [37]:
#print(g.serialize(format='ntriples'))

## fin

In [38]:
for name in instances.keys():
    print(name)


2/1_CO_(TRK)/1_BN/241_INF_BDE_(TM)
1_CO_(TRK)/1_BN/241_INF_BDE_(TM)
1_BN/241_INF_BDE_(TM)
241_INF_BDE_(TM)
B_CO/1/22_IN
1/22_IN
22_IN
SCT_PLT/1/22_IN
1/1_CO_(TRK)/1_BN/241_INF_BDE_(TM)
3/1_CO_(TRK)/1_BN/241_INF_BDE_(TM)
C_CO/1/22_IN
50_ENG_CO_(MRBC)/1/22_IN
A_CO/1/22_IN
D_CO/1/22_IN
MORTAR_PLT/1/22_IN


In [39]:
for key, value in unrecognized_alerts.items(): print(value, ':', key)

In [40]:
for key, value in objname2IRI.items(): print(value, ':', key)

TASK_iYwgR : Crossing Task [1]
OBJ_BDFvs : River Terrain [1]


In [41]:
q = """select ?A ?S where {?A CM.isa CM.Crossing; CM.subj ?S}"""

In [42]:
q

'select ?A ?S where {?A CM.isa CM.Crossing; CM.subj ?S}'

In [None]:
for row in g.query(q):
    print(row.A, row.S)

In [None]:
g.query(q)