# Practical Solution: Creating a Knowledge Graph in OWLReady2

In [1]:
!pip install owlready2
from owlready2 import *
# owlready2.JAVA_EXE = "C:\\path\\to\\java.exe" #windows users


Defaulting to user installation because normal site-packages is not writeable


We begin by importing an ontology. If we are extending an existing ontology then we would import this here. However, here we are creating an ontology from scratch, so we import a blank ontology.

In [2]:
onto = get_ontology("http://www.dummy.info/new.owl")

We will now create some *classes* (types) of entity and attach these to the ontology. One way of doing this is use `with` which "opens" the ontology object we have just created and creates our new entities within it. Each entity is created by creating a new class of type `onto.Thing`.

In [3]:

with onto: # This automatically attached the entities to the ontology
    class QUBStaff(Thing):
        pass

    class QUBStudent(Thing):
        pass

    class Program(Thing):
        pass

    class Module(Thing):
        pass

Now we can attach some attributes (properties to each of the entities). For each attribute, we will need to specify which type of entity the property should be attached to, and what the type of the allowable values is:

In [4]:
with onto:
    class staff_id(DataProperty):
        domain = [QUBStaff]
        range = [int]

    # Leave this one out initially to demonstrate reparenting
    class staff_title(DataProperty):
        domain = [QUBStaff]
        range = [str]

    class student_id(DataProperty):
        domain = [QUBStudent]
        range = [int]

    class person_name(DataProperty):
        domain = [QUBStaff,QUBStudent]
        range = [str]

    # Alternative way
    class ProgramTitle(Program >> str):
        pass

    class ProgramID(DataProperty):
        domain = [Module]
        range = [int]

    # Leave this one out initially to demonstrate reparenting
    class ProgramLength(DataProperty):
        domain = [Program]
        range = [str]

    class ModuleTitle(DataProperty):
        domain = [Module]
        range = [str]

    class ModuleID(DataProperty):
        domain = [Module]
        range = [int]

Now we specify some relations. These are specified as `ObjectProperty` and must tell us what type of `Thing` they can be between:

In [5]:
with onto:
    class teaches(ObjectProperty):
        domain = [QUBStaff]
        range = [Module]

    class enrolled_on(ObjectProperty):
        domain = [QUBStudent]
        range = [Program]

    class includes_module(ObjectProperty):
        domain = [Program]
        range = [Module]

    class takes(ObjectProperty):
        domain = [QUBStudent]
        range = [Module]

Let's save the ontology here

In [6]:
onto.save('teaching.rdf')

## Populating the Graph

Now we populate the graph. We'll include:
* Six modules
* Two programs
* Six members of academic staff
* Two members of administrative staff
* Five students

Create the modules

In [7]:
knowledgeengineering = Module(name='knowledgeengineering', ModuleTitle = ["Knowledge Engineering"], ModuleID = [8052])
introtoai = Module(name='introtoai', ModuleTitle = ["Introduction to AI"], ModuleID = [8050])
compvision = Module(name='compvision', ModuleTitle = ["Computer Vision"], ModuleID = [8053])
nlp = Module(name='nlp', ModuleTitle = ["Natural Language Processing"], ModuleID = [8054])
aiforhealth = Module(name='aiforhealth', ModuleTitle = ["AI for Health"], ModuleID = [8055])

# can also do it this way
machinelearning = Module(name='machinelearning')
machinelearning.ModuleTitle = ["MachineLearning"]
machinelearning.ModuleID = [8051]


Now we add two programs

In [8]:
mscaift = Program(name='mscaift', ProgramTitle = ['MSc AI Full-time'], ProgramID = [12345], ProgramLength = ['1 year'])
mscaipt = Program(name='mscaipt', ProgramTitle = ['MSc AI Part-time'], ProgramID = [52345], ProgramLength = ['2 years'])


Six academic staff and two administrators - we will create these just as QUB staff and let the equivalence relation do the work

In [9]:
lb = QUBStaff(name='lb', person_name = ["Lu Bai"], staff_id = [456789], staff_title = ['Dr'])
hw = QUBStaff(name='hw', person_name = ["Hui Wang"], staff_id = [945678], staff_title = ['Professor'])
ibs = QUBStaff(name='ibs', person_name = ["Iain Styles"], staff_id = [894567], staff_title = ['Professor'])
yh = QUBStaff(name='yh', person_name = ["Yang Hua"], staff_id = [789456], staff_title = ['Dr'])
bd = QUBStaff(name='bd', person_name = ["Barry Devereux"], staff_id = [678945], staff_title = ['Dr'])
rr = QUBStaff(name='rr', person_name = ["Reza Rafiee"], staff_id = [567894], staff_title = ['Dr'])
ec = QUBStaff(name='ec', person_name = ["Elaine Cranston"], staff_id = [345678], staff_title = ['Mrs'])
sk = QUBStaff(name='sk', person_name = ["Sonia Katic"], staff_id = [456783], staff_title = ['Ms'])

Finally, some students

In [10]:
at = QUBStudent(name='at', person_name = ['Alan Turing'], student_id = [234567])
gh = QUBStudent(name='gh', person_name = ['Grace Hopper'], student_id = [723456])
al = QUBStudent(name='al', person_name = ['Ada Lovelace'], student_id = [672345])
mm = QUBStudent(name='mm', person_name = ['Marvin Minsky'], student_id = [567234])
jm = QUBStudent(name='jm', person_name = ['John McCarthy'], student_id = [456723])

Now we add the relations. First, which modules are on each program

In [11]:
mscaift.includes_module = [introtoai, machinelearning, knowledgeengineering, compvision, nlp, aiforhealth]
mscaift.includes_module = [introtoai, machinelearning, compvision]

Now, which students are on each program

In [12]:
at.enrolled_on = [mscaift]
gh.enrolled_on = [mscaipt]
al.enrolled_on = [mscaift]
mm.enrolled_on = [mscaipt]
jm.enrolled_on = [mscaift]

Add students to modules

In [13]:
at.takes = [introtoai, machinelearning, knowledgeengineering, compvision, nlp, aiforhealth]
gh.takes = [introtoai, machinelearning, knowledgeengineering]
al.takes = [introtoai, machinelearning, knowledgeengineering, compvision, nlp, aiforhealth]
mm.takes = [introtoai, machinelearning, knowledgeengineering]
jm.takes = [aiforhealth]

Finally, who teaches what

In [14]:
lb.teaches = [introtoai]
hw.teaches = [machinelearning]
ibs.teaches = [knowledgeengineering]
yh.teaches = [compvision]
bd.teaches = [nlp]
rr.teaches = [aiforhealth]

In [15]:
onto.save('teaching.rdf')

## Querying the graph

Now we can construct some simple queries on the graph. 

In [16]:
print(f"{ibs.person_name[0]} teaches {ibs.teaches[0].ModuleTitle[0]}")

Iain Styles teaches Knowledge Engineering


This rapidly becomes inflexible: we want to query classes of object, and this will become very cumbersome. Fortunately there is a mechanism for this. The language designed for this is called SPARQL which is very similar to SQL. Let us see how it works with a few simple examples.

Here is a very simple query that returns everything in the dataset

In [17]:
list(default_world.sparql(
    """
    PREFIX RDF: <http://www.dummy.info/new.owl#>
    
    SELECT ?subject ?predicate ?object
    WHERE{
        ?subject ?predicate ?object
    }
    """))

[[.anonymous, 6, owl.Ontology],
 [www.dummy.info.new.owl, 6, owl.Ontology],
 [new.QUBStaff, 6, 11],
 [new.QUBStaff, 9, owl.Thing],
 [new.QUBStudent, 6, 11],
 [new.QUBStudent, 9, owl.Thing],
 [new.Program, 6, 11],
 [new.Program, 9, owl.Thing],
 [new.Module, 6, 11],
 [new.Module, 9, owl.Thing],
 [new.staff_id, 6, owl.DatatypeProperty],
 [new.staff_id, 7, new.QUBStaff],
 [new.staff_id, 8, int],
 [new.staff_title, 6, owl.DatatypeProperty],
 [new.staff_title, 7, new.QUBStaff],
 [new.staff_title, 8, str],
 [new.student_id, 6, owl.DatatypeProperty],
 [new.student_id, 7, new.QUBStudent],
 [new.student_id, 8, int],
 [new.person_name, 6, owl.DatatypeProperty],
 [new.person_name, 7, new.QUBStudent],
 [new.person_name, 7, new.QUBStaff],
 [new.person_name, 8, str],
 [new.ProgramTitle, 6, owl.DatatypeProperty],
 [new.ProgramTitle, 7, new.Program],
 [new.ProgramTitle, 8, str],
 [new.ProgramID, 6, owl.DatatypeProperty],
 [new.ProgramID, 7, new.Module],
 [new.ProgramID, 8, int],
 [new.ProgramLength, 6,


We can refine this query by, for example, restricting the predicate and the object to get specific object for which the predicate with variable object is true.

For example, to get all members of staff, we want to get all objects of type `QUBStaff`:

In [18]:
list(default_world.sparql(
    """
    PREFIX RDF: <http://www.dummy.info/new.owl#>
    
    SELECT ?x
    WHERE{
        ?x rdf:type RDF:QUBStaff
    }
    """))

[[new.lb],
 [new.hw],
 [new.ibs],
 [new.yh],
 [new.bd],
 [new.rr],
 [new.ec],
 [new.sk]]

Get all students

In [19]:
list(default_world.sparql(
    """
    PREFIX RDF: <http://www.dummy.info/new.owl#>
    
    SELECT ?student
    WHERE{
        ?student rdf:type RDF:QUBStudent
    }
    """))

[[new.at], [new.gh], [new.al], [new.mm], [new.jm]]

Get all modules and the staff who teach them

In [20]:
list(default_world.sparql(
    """
    PREFIX ONTO: <http://www.dummy.info/new.owl#>
    
    SELECT ?staff ?module
    WHERE{
        ?staff rdf:type ONTO:QUBStaff
        ?module rdf:type ONTO:Module
        ?staff ONTO:teaches ?module

    }
    """))

[[new.lb, new.introtoai],
 [new.hw, new.machinelearning],
 [new.ibs, new.knowledgeengineering],
 [new.yh, new.compvision],
 [new.bd, new.nlp],
 [new.rr, new.aiforhealth]]

Our earlier query: all staff taught by an individual

In [21]:
list(default_world.sparql(
    """
    PREFIX RDF: <http://www.dummy.info/new.owl#>
    
    SELECT ?staff ?module
    WHERE{
        ?staff rdf:type RDF:QUBStaff
        ?module rdf:type RDF:Module
        ?staff RDF:teaches ?module
        ?staff RDF:person_name "Iain Styles"
    }
    """))

[[new.ibs, new.knowledgeengineering]]

Get all students taught by each member of staff

In [22]:
list(default_world.sparql(
    """
    PREFIX RDF: <http://www.dummy.info/new.owl#>
    
    SELECT ?staff ?student
    WHERE{
        ?staff rdf:type RDF:QUBStaff
        ?student rdf:type RDF:QUBStudent
        ?module rdf:type RDF:Module
        ?staff RDF:teaches ?module
        ?student RDF:takes ?module
    }
    """))

[[new.lb, new.at],
 [new.lb, new.gh],
 [new.lb, new.al],
 [new.lb, new.mm],
 [new.hw, new.at],
 [new.hw, new.gh],
 [new.hw, new.al],
 [new.hw, new.mm],
 [new.ibs, new.at],
 [new.ibs, new.gh],
 [new.ibs, new.al],
 [new.ibs, new.mm],
 [new.yh, new.at],
 [new.yh, new.al],
 [new.bd, new.at],
 [new.bd, new.al],
 [new.rr, new.at],
 [new.rr, new.al],
 [new.rr, new.jm]]