# Importing and Downloading

In [None]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-5.20.0.tar.gz (202 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/203.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m122.9/203.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m194.6/203.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.0/203.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25l[?25hdone
  Created wheel for neo4j: filename=neo4j-5.20.0-py3-none-any.whl size=280771 sh

In [None]:
# @title Importing Libraries

import neo4j
from neo4j import GraphDatabase
import networkx as nx
import matplotlib.pyplot as plt
import time
import numpy as np

# Connection to the Neo4J Session

In [None]:
# @title Opening Neo4J Session
class Neo4jConnection:

    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)

    def close(self):
        if self.__driver is not None:
            self.__driver.close()

    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session is not None:
                session.close()
        return response

    def reset_db(self):
      delete_query = "MATCH (n) DETACH DELETE n"
      self.query(delete_query)
      print("Reset Executed!")

In [None]:
# @title Credentials for connecting to the DBMS

uri = ""
user = ""
pwd = ""
conn = Neo4jConnection(uri=uri,
                       user=user,
                       pwd=pwd)

# Python-Based Knowledge Graph

In [None]:
# We use an RDF-like syntax, we cannot use some symbols (e.g. ":") because they
# are misintrepreted by Neo4J (so we use "_" instead)
#
# Built-In Vocabulary
#
# mykg_Resource
# mykg_Class
# mykg_Property
# mykg_type
# mykg_subClassOf
# mykg_subPropertyOf
# mykg_domain
# mykg_range

In [None]:
class KnowledgeGraph():
  """The class implements a KnowledgeGraph in Python, it can interact with Neo4J
     importing and exporting well-formed KGs.
  """

  def __init__(self, input: str|Neo4jConnection|None = None, meta_structure: int = 0) -> None:
    """Constructor of the class.
       input:
          -) a well-formed string of triples in a rdfs-like format
          -) a Neo4J connection to a well-formed Knowledge Graph
          -) None in the case you want to create an empty KG
       meta_structure:
          if 0 shows only the meta-classes
          if 1 shows also the meta-properties but hides their domain/range
          if 2 shows the whole meta-structure
       It creates an instance of the KnowledgeGraph class.
    """

    assert meta_structure in [0, 1, 2]

    self.meta_structure = meta_structure

    self.triples = set()
    self.classes = set()
    self.properties = set()
    self.instances = set()

    if type(input) == str:
      self.triples, self.classes, self.properties, self.instances = KnowledgeGraph.__from_text__(input)
    elif type(input) == Neo4jConnection:
      self.triples, self.classes, self.properties, self.instances = KnowledgeGraph.__from_neo4j__(input)
    elif input is None:
      pass
    else:
      raise ValueError("Error: input should be a string or a Neo4j connection")


  @staticmethod
  def __from_text__(text: str, separator: str = ",") -> tuple[set, set, set, set]:
    """Reads a text in an rdfs-like format and produces the set of triples,
       classes, properties and instances based on the syntax.
       text:
          the text to parse
       separator:
          the separator to use in between each triple
       result:
          a tuple composed by
          -) triples: the triples read from the string
          -) classes: terms appearing in class position
          -) properties: terms appearing in property position
          -) instances: terms appearing in neither positions
    """

    elements = [elem.strip() for elem in text.split(separator)]

    triples = set()
    for elem in elements:
      components = tuple(elem.split())
      if len(components) != 3:
        print(components)
        raise ValueError("Error: Each element must contain exactly three items. It should be in the format X B C, ...")
      triples.add(components)

    classes, properties, instances = KnowledgeGraph.__syntax_checker__(triples)

    # If all elements are triplets, return the list of triplets
    return triples, classes, properties, instances


  @staticmethod
  def __from_neo4j__(conn: Neo4jConnection) -> tuple[set, set, set, set]:
    """Using a Neo4JConnection, it reads an rdfs-like Knowledge Graph from it
       and produces the set of triples, classes, properties and instances based
       on the syntax.
       conn:
          the connection to Neo4J
       result:
          a tuple composed by
          -) triples: the edges read from the Neo4j graph
          -) classes: terms appearing in class position
          -) properties: terms appearing in property position
          -) instances: terms appearing in neither positions
    """

    query = "MATCH (node1) -[edge]-> (node2) RETURN node1, edge, node2"
    results = conn.query(query)

    triples = set()
    for elem in results:
      edge = elem.data()['edge']
      triple = edge[0]['mykg_URI'], edge[1], edge[2]['mykg_URI']
      triples.add(triple)

    classes, properties, instances = KnowledgeGraph.__syntax_checker__(triples)

    return triples, classes, properties, instances


  @staticmethod
  def __meta_structure__(meta_structure: int = 0) -> tuple[set, set, set]:
      """Returns the metastructure of a Knowledge Graph
         meta_structure:
            False: return only the metaclasses (and their edges)
            True: return metaclasses and metaproperties (and their edges)
      """

      meta_triples = set()
      meta_classes = set()
      meta_properties = set()

      meta_classes.add("mykg_Resource")
      meta_classes.add("mykg_Class")
      meta_classes.add("mykg_Property")
      meta_triples.add(("mykg_Class", "mykg_subClassOf", "mykg_Resource"))
      meta_triples.add(("mykg_Property", "mykg_subClassOf", "mykg_Resource"))

      if meta_structure >= 1:

          meta_properties.add("mykg_type")
          meta_properties.add("mykg_subClassOf")
          meta_properties.add("mykg_subPropertyOf")
          meta_properties.add("mykg_domain")
          meta_properties.add("mykg_range")


      if meta_structure >= 2:

          meta_triples.add(("mykg_type", "mykg_domain", "mykg_Resource"))
          meta_triples.add(("mykg_type", "mykg_range", "mykg_Class"))

          meta_triples.add(("mykg_subClassOf", "mykg_domain", "mykg_Class"))
          meta_triples.add(("mykg_subClassOf", "mykg_range", "mykg_Class"))

          meta_triples.add(("mykg_subPropertyOf", "mykg_domain", "mykg_Property"))
          meta_triples.add(("mykg_subPropertyOf", "mykg_range", "mykg_Property"))

          meta_triples.add(("mykg_domain", "mykg_domain", "mykg_Property"))
          meta_triples.add(("mykg_domain", "mykg_range", "mykg_Class"))

          meta_triples.add(("mykg_range", "mykg_domain", "mykg_Property"))
          meta_triples.add(("mykg_range", "mykg_range", "mykg_Class"))

      return meta_triples, meta_classes, meta_properties


  @staticmethod
  def __syntax_checker__(triples: set) -> tuple[set, set, set]:
      """Given a set of triples, verifies if they respect the rdfs-like syntax.
         triples:
            set of triples for which we need to verify the syntax
         result:
            a tuple composed by
            -) classes: terms appearing in class position
            -) properties: terms appearing in property position
            -) instances: terms appearing in neither positions
         If the KG is not legal (i.e. it doesn't respect the syntax) then the
         error ValueError is thrown.
      """

      classes = set()
      properties = set()
      instances = set()
      undefined = set()

      meta_structure = KnowledgeGraph(meta_structure = 2)
      meta_structure.perform_deduction(3)
      meta_triples = meta_structure.triples
      meta_classes = meta_structure.classes
      meta_properties = meta_structure.properties


      for triple in triples:

        # Split the triple into subject, predicate, and object
        sub, pred, obj = triple

        # Check if the triple matches any of the valid syntaxes
        if pred == "mykg_type":
          classes.add(obj)
          if obj == "mykg_Class":
            classes.add(sub)
          elif obj == "mykg_Property":
            properties.add(sub)
          elif obj == "mykg_Resource":
            undefined.add(sub)
          else:
            instances.add(sub)

        elif pred == "mykg_subClassOf":
          classes.add(sub)
          classes.add(obj)

        elif pred == "mykg_domain":
          classes.add(obj)
          properties.add(sub)

        elif pred == "mykg_range":
          classes.add(obj)
          properties.add(sub)

        elif pred == "mykg_subPropertyOf":
          properties.add(sub)
          properties.add(obj)

        else:
          properties.add(pred)
          instances.add(sub)
          instances.add(obj)

        # Check if the triples uses improperly the namespace or the metastructure

        cond1 = (sub[0:5] == "mykg_" and sub not in (meta_classes | meta_properties))
        cond2 = (obj[0:5] == "mykg_" and obj not in (meta_classes | meta_properties))
        cond3 = (pred[0:5] == "mykg_" and pred not in meta_properties)
        if cond1 or cond2  or cond3:
          raise ValueError("Namespace 'mykg_' used improperly:", triple)

        cond1 = (sub[0:5] != "mykg_" and obj[0:5] != "mykg_")
        cond2 = (sub[0:5] != "mykg_" and pred == 'mykg_type' and obj in ['mykg_Resource', 'mykg_Class', 'mykg_Property'])
        cond3 = (sub[0:5] != "mykg_" and pred == "mykg_subClassOf" and obj == "mykg_Resource")
        cond4 = (triple in meta_triples)
        if not (cond1 or cond2 or cond3 or cond4):
          raise ValueError("Metastructure used improperly:", triple)

      instances = instances.union((undefined - classes - properties))

      if len(classes & properties) != 0:
        raise ValueError("Symbols appears in classes and properties position:", classes & properties)
      elif len(properties & instances) != 0:
        raise ValueError("Symbol appears in both properties and instances position:", properties & instances)
      elif len(classes & instances) != 0:
        raise ValueError("Symbol appears in both classes and instances position:", classes & instances)

      return classes, properties, instances


  def add_knowledge(self, input: Neo4jConnection|str) -> None:
    """Add triples to an already formed instance of the KnowledgeGraph class.
       input:
          can be
          1) a well-formed string of triples in a rdfs-like format
          2) a Neo4J connection to a well-formed Knowledge Graph
       result:
          adds triples, instances, properties, classes in the KG
    """

    if type(input) == str:
      triples, classes, properties, instances = KnowledgeGraph.__from_text__(input)
    elif type(input) == Neo4jConnection:
      triples, classes, properties, instances = KnowledgeGraph.__from_neo4j__(input)
    else:
      raise ValueError("Error: input should be a string or a Neo4j connection")

    self.triples = self.triples.union(triples)
    self.classes = self.classes.union(classes)
    self.properties = self.properties.union(properties)
    self.instances = self.instances.union(instances)


  def __to_neo4j__(self, conn: Neo4jConnection) -> None:
    """Given a Neo4JConnection in input, puts on that connection a well-formed
       representation of an rdfs-like knowledge graph.
       conn:
          a Neo4Jconnection
       result:
          outputs on the connection the KnowledgeGraph instance
       Each node represents a resource and has a 'mykg_URI' field with its
       univocal value and has a label for each class it belongs to.
       Each edge represent a property and has the type of its predicate. A
       hierarchy of properties is represented with separate edges
    """

    node_query = 'MERGE (:mykg_Node {mykg_URI:$mykg_URI})'
    edge_query = 'MATCH (n1 {mykg_URI:$n1}), (n2 {mykg_URI:$n2}) MERGE (n1)-[:%s]->(n2)'
    # rivedere (provare a farlo senza %s)

    for c in self.classes:
      conn.query(node_query, {"mykg_URI": c})

    for p in self.properties:
      conn.query(node_query, {"mykg_URI": p})

    for i in self.instances:
      conn.query(node_query, {"mykg_URI": i})

    for sub, pred, obj in self.triples:
      query = edge_query % pred
      conn.query(query, {"n1": sub, "n2": obj})

    self.__class_to_label__(conn) #CALLED FOR DEFINING EVERYTHING BETTER


  def to_text(self) -> str:
    """Produces a textual representation of the KnowledgeGraph instance.
       result:
          the text representing the KnowledgeGraph instance
    """

    text = ""

    for t in self.triples:
      line = "{n1} {e} {n2}".format(n1 = t[0], e = t[1], n2 = t[2])
      text += line + "\n"

    text = text[0:-1]
    return text


  def __str__(self) -> str:
    return self.to_text()


  def __len__(self) -> int:
    return len(self.triples)


  def __syntax_deductions__(self) -> None:
    """Adds to the knowledge graph all the assertions that are 'immediately
       deducible' from the syntax structure (concept of 'immediately deducible'
       from slide 168).
       It adds the meta-structure and applies the following rules:
          1) every node in class position is a class
          2) every node in property position is a property
          3) every node that is neither is an instance
    """

    meta_triples, meta_classes, meta_properties = KnowledgeGraph.__meta_structure__(self.meta_structure)

    self.classes = self.classes.union(meta_classes)
    self.properties = self.properties.union(meta_properties)
    self.triples = self.triples.union(meta_triples)

    for c in self.classes:
      self.triples.add((c, "mykg_type", "mykg_Class"))

    for p in self.properties:
      self.triples.add((p, "mykg_type", "mykg_Property"))

    for i in self.instances:
      self.triples.add((i, "mykg_type", "mykg_Resource"))


  def __instance_based_completion__(self) -> None: #IMPORTANT: THIS DOESN'T USE NEO4J, we could do it using neo4J but it requires a connection!
    """ Adds to the knowledge graph all the assertions about instances that can
        be logically deduced from the graph (through the meta-properties).
        It applies the following rules:
            1) if (X rdf:type C) and (C rdfs:subClassOf D) then (X rdf:type D)
            2) if (X P Y) and (P rdfs:subPropertyOf Q) then (X Q Y)
            3) if (X P Y) and (P rdfs:domain C) then (X rdf:type C)
            4) if (X P Y) and (P rdfs:range C) then (Y rdf:type C)
        It stops when it reaches the fixpoint (no new instance-based assertions
        can be deduced from the knowledge graph).
    """

    # we must assert first that completion is already done correctly
    K1 = self.triples.copy()
    while True:

        K0 = K1.copy() # Update K0 to the current state of K1
        new_triples = set()

        # Rule 1 - if (X rdf:type C) and (C rdfs:subClassOf D) then (X rdf:type D)
        for triple1 in K1:
            if triple1[1] == "mykg_type":
                X = triple1[0]
                C = triple1[2]

                for triple2 in K1:
                    if triple2[0] == C and triple2[1] == "mykg_subClassOf":
                        D = triple2[2]
                        deduced = (X, "mykg_type", D)
                        if deduced not in K1:
                            new_triples.add(deduced)

        # Rule 2 - if (X P Y) and (P rdfs:subPropertyOf Q) then (X Q Y)
        for triple1 in K1:
            X = triple1[0]
            P = triple1[1]
            Y = triple1[2]

            for triple2 in K1:
                if triple2[0] == P and triple2[1] == "mykg_subPropertyOf":
                    Q = triple2[2]
                    deduced = (X, Q, Y)
                    if deduced not in K1:
                        new_triples.add(deduced)

        # Rule 3 - if (X P Y) and (P rdfs:domain C) then (X rdf:type C)
        for triple1 in K1:
            X = triple1[0]
            P = triple1[1]
            Y = triple1[2]

            for triple2 in K1:
                if triple2[0] == P and triple2[1] == "mykg_domain":
                    C = triple2[2]
                    deduced = (X, "mykg_type", C)
                    if deduced not in K1:
                        new_triples.add(deduced)

        # Rule 4 - if (X P Y) and (P rdfs:range C) then (Y rdf:type C)
        for triple1 in K1:
            X = triple1[0]
            P = triple1[1]
            Y = triple1[2]

            for triple2 in K1:
                if triple2[0] == P and triple2[1] == "mykg_range":
                    C = triple2[2]
                    deduced = (Y, "mykg_type", C)
                    if deduced not in K1:
                      new_triples.add(deduced)

        K1 = K1.union(new_triples)
        # Check if K1 is equal to K0
        if K1 == K0:
            self.triples=K1
            break


  def __class_property_completion__(self) -> None:
    """ Adds to the knowledge graph all the assertions about classes and
        properties that can be logically deduced from the graph (through the
        meta-properties).
        It applies the following rules:
            1) if (C1 rdfs:subClassOf C2) and (C2 rdfs:subClassOf C3) then (C1 rdfs:subClassOf C3)
            2) if (P1 rdf:subPropertyOf P2) and (P2 rdf:subPropertyOf P3) then (P1 rdf:subPropertyOf P3)
            3) if (P1 rdf:subPropertyOf P2) and (P2 rdfs:domain C) then (P1 rdfs:domain C)
            4) if (P1 rdf:subPropertyOf P2) and (P2 rdfs:range C) then (P1 rdfs:range C)
            5) if (C1 rdfs:subClassOf C2) and (P rdfs:domain C1) then (P rdfs:domain C2)
            6) if (C1 rdfs:subClassOf C2) and (P rdfs:range C1) then (P rdfs:range C2)
        It stops when it reaches the fixpoint (no new assertions about classes
        or properties can be deduced from the knowledge graph).
    """

    K1 = self.triples.copy()
    while True:
        K0 = K1.copy()

        new_triples = set()

        # Rule 1 - if (C1 rdfs:subClassOf C2) and (C2 rdfs:subClassOf C3) then (C1 rdfs:subClassOf C3)
        for triple1 in K1:
            if triple1[1] == "mykg_subClassOf":
                C1 = triple1[0]
                C2 = triple1[2]

                for triple2 in K1:
                    if triple2[0] == C2 and triple2[1] == "mykg_subClassOf":
                        C3 = triple2[2]
                        deduced = (C1, "mykg_subClassOf", C3)
                        if deduced not in K1:
                            new_triples.add(deduced)

        # Rule 2 - if (P1 rdf:subPropertyOf P2) and (P2 rdf:subPropertyOf P3) then (P1 rdf:subPropertyOf P3)
        for triple1 in K1:
            if triple1[1] == "mykg_subPropertyOf":
                P1 = triple1[0]
                P2 = triple1[2]

                for triple2 in K1:
                    if triple2[0] == P2 and triple2[1] == "mykg_subPropertyOf":
                        P3 = triple2[2]
                        deduced = (P1, "mykg_subPropertyOf", P3)
                        if deduced not in K1:
                            new_triples.add(deduced)

        # Rule 3 - if (P1 rdf:subPropertyOf P2) and (P2 rdfs:domain C) then (P1 rdfs:domain C)
        for triple1 in K1:
            if triple1[1] == "mykg_subPropertyOf":
                P1 = triple1[0]
                P2 = triple1[2]

                for triple2 in K1:
                    if triple2[0] == P2 and triple2[1] == "mykg_domain":
                        C = triple2[2]
                        deduced = (P1, "mykg_domain", C)
                        if deduced not in K1:
                            new_triples.add(deduced)

        # Rule 4 - if (P1 rdf:subPropertyOf P2) and (P2 rdfs:domain C) then (P1 rdfs:domain C)
        for triple1 in K1:
            if triple1[1] == "mykg_subPropertyOf":
                P1 = triple1[0]
                P2 = triple1[2]

                for triple2 in K1:
                    if triple2[0] == P2 and triple2[1] == "mykg_range":
                        C = triple2[2]
                        deduced = (P1, "mykg_range", C)
                        if deduced not in K1:
                            new_triples.add(deduced)

        # Rule 5 - if (C1 rdfs:subClassOf C2) and (P rdfs:domain C1) then (P rdfs:domain C2)
        for triple1 in K1:
            if triple1[1] == "mykg_subClassOf":
                C1 = triple1[0]
                C2 = triple1[2]

                for triple2 in K1:
                    if triple2[2] == C1 and triple2[1] == 'mykg_domain':
                        P = triple2[0]
                        deduced = (P, 'mykg_domain', C2)
                        if deduced not in K1:
                            new_triples.add(deduced)

        # Rule 6 - if (C1 rdfs:subClassOf C2) and (P rdfs:range C1) then (P rdfs:range C2)
        for triple1 in K1:
            if triple1[1] == "mykg_subClassOf":
                C1 = triple1[0]
                C2 = triple1[2]

                for triple2 in K1:
                    if triple2[2] == C1 and triple2[1] == 'mykg_range':
                        P = triple2[0]
                        deduced = (P, 'mykg_range', C2)
                        if deduced not in K1:
                            new_triples.add(deduced)

        K1 = K1.union(new_triples)
        # Check if K1 is equal to K0
        if K1 == K0:
            self.triples=K1
            break


  def __class_to_label__(self, conn: Neo4jConnection) -> None:
    """ Given a Neo4JConnection adds to each neo4j node the labels of classes it
        is an instance of.
        conn:
            a Neo4JConnection
    """

    find_query = 'MATCH (node)-[:mykg_type]->(class) return node,class'
    update_query = 'MATCH (node {mykg_URI:$mykg_URI}) SET node :%s RETURN node'

    results = conn.query(find_query)

    meta_triples, meta_classes, meta_properties = self.__meta_structure__(self.meta_structure)

    for n in meta_classes:
      query = update_query % 'MetaClass'
      conn.query(query, {'mykg_URI': n})

    for n in meta_properties:
      query = update_query % 'MetaProperty'
      conn.query(query, {'mykg_URI': n})

    for elem in results:
      n = elem.data()['node']['mykg_URI']
      c = elem.data()['class']['mykg_URI']
      query = update_query % c
      conn.query(query, {'mykg_URI': n})


  def perform_deduction(self, level: int = 1, conn: Neo4jConnection|None = None) -> None:
    """Perform deduction based on the level specified in input.
       level:
          can be
          1: syntax deduction
          2: syntax deduction + instance-based completion
          3: syntax deduction + instance-based completion + class-property completion
    """

    assert level in [1, 2, 3]

    if level == 1:
      self.__syntax_deductions__()
    elif level == 2:
      self.__syntax_deductions__()
      self.__instance_based_completion__()
    elif level == 3:
      self.__syntax_deductions__()
      self.__instance_based_completion__()
      self.__class_property_completion__()

    if conn is not None:
      self.__to_neo4j__(conn)

# Neo4j-Based Knowledge Graph

In [None]:
class neo4j_KnowledgeGraph():

  def __init__(self, conn: Neo4jConnection, input: str|Neo4jConnection|None = None, meta_structure: int = 0) -> None:

    assert meta_structure in [0, 1, 2]

    self.meta_structure = meta_structure

    conn.reset_db()
    self.conn = conn

    if type(input) == str:
      neo4j_KnowledgeGraph.__from_text__(conn, input)
    elif type(input) == Neo4jConnection:
      neo4j_KnowledgeGraph.__from_neo4j__(conn, input)
    elif input is None:
      pass
    else:
      raise ValueError("Error: input should be a string or a Neo4j connection")


  @staticmethod
  def __from_text__(conn: Neo4jConnection, text: str, separator: str = ",") -> None:

    node_query = 'MERGE (:mykg_Node {mykg_URI:$mykg_URI})'
    edge_query = 'MATCH (n1 {mykg_URI:$n1}), (n2 {mykg_URI:$n2}) MERGE (n1)-[:%s]->(n2)'

    elements = [elem.strip() for elem in text.split(separator)]

    nodes = set()
    triples = set()
    for elem in elements:
      components = tuple(elem.split())
      if len(components) != 3:
        print(components)
        raise ValueError("Error: Each element must contain exactly three items. It should be in the format X B C, ...")
      triples.add(components)
      nodes.add(components[0])
      nodes.add(components[2])

    for node in nodes:
      conn.query(node_query, parameters = {'mykg_URI': node})

    for triple in triples:
      query = edge_query % triple[1]
      conn.query(query, {'n1':triple[0], 'n2':triple[2]})

    neo4j_KnowledgeGraph.__syntax_checker__(conn)


  @staticmethod
  def __from_neo4j__(conn: Neo4jConnection, input: Neo4jConnection) -> None:

    input_query = "MATCH (node1) -[edge]-> (node2) RETURN node1, edge, node2"
    node_query = 'MERGE (:mykg_Node {mykg_URI:$mykg_URI})'
    edge_query = 'MATCH (n1 {mykg_URI:$n1}), (n2 {mykg_URI:$n2}) MERGE (n1)-[:%s]->(n2)'

    results = conn.query(input_query)

    nodes = set()
    triples = set()
    for elem in results:
      edge = elem.data()['edge']
      triple = edge[0]['mykg_URI'], edge[1], edge[2]['mykg_URI']
      triples.add(triple)
      nodes.add(edge[0])
      nodes.add(edge[2])

    for node in nodes:
      conn.query(node_query, parameters = {'mykg_URI': node})

    for triple in triples:
      query = edge_query % triple[1]
      conn.query(query, {'n1':triple[0], 'n2':triple[2]})

    neo4j_KnowledgeGraph.__syntax_checker__(conn)


  @staticmethod
  def __meta_structure__(meta_structure: int = 0) -> tuple[set, set, set]:

      meta_triples = set()
      meta_classes = set()
      meta_properties = set()

      meta_classes.add("mykg_Resource")
      meta_classes.add("mykg_Class")
      meta_classes.add("mykg_Property")
      meta_triples.add(("mykg_Class", "mykg_subClassOf", "mykg_Resource"))
      meta_triples.add(("mykg_Property", "mykg_subClassOf", "mykg_Resource"))

      if meta_structure >= 1:

          meta_properties.add("mykg_type")
          meta_properties.add("mykg_subClassOf")
          meta_properties.add("mykg_subPropertyOf")
          meta_properties.add("mykg_domain")
          meta_properties.add("mykg_range")

      if meta_structure >= 2:

          meta_triples.add(("mykg_type", "mykg_domain", "mykg_Resource"))
          meta_triples.add(("mykg_type", "mykg_range", "mykg_Class"))

          meta_triples.add(("mykg_subClassOf", "mykg_domain", "mykg_Class"))
          meta_triples.add(("mykg_subClassOf", "mykg_range", "mykg_Class"))

          meta_triples.add(("mykg_subPropertyOf", "mykg_domain", "mykg_Property"))
          meta_triples.add(("mykg_subPropertyOf", "mykg_range", "mykg_Property"))

          meta_triples.add(("mykg_domain", "mykg_domain", "mykg_Property"))
          meta_triples.add(("mykg_domain", "mykg_range", "mykg_Class"))

          meta_triples.add(("mykg_range", "mykg_domain", "mykg_Property"))
          meta_triples.add(("mykg_range", "mykg_range", "mykg_Class"))

      return meta_triples, meta_classes, meta_properties


  @staticmethod
  def __syntax_checker__(conn):

    class_queries = (
      'MATCH (c)-[:mykg_type]->({mykg_URI:"mykg_Class"}) RETURN c.mykg_URI as URI',
      'MATCH (c)-[:mykg_subClassOf]->(c2) RETURN c.mykg_URI as URI',
      'MATCH (c2)-[:mykg_subClassOf]->(c) RETURN c.mykg_URI as URI',
      'MATCH (p)-[:mykg_domain]->(c) RETURN c.mykg_URI as URI',
      'MATCH (p)-[:mykg_range]->(c) RETURN c.mykg_URI as URI',
      'MATCH (i)-[:mykg_type]->(c) RETURN c.mykg_URI as URI'
    )

    property_queries = (
      'MATCH (p)-[:mykg_type]->({mykg_URI:"mykg_Property"}) RETURN p.mykg_URI as URI',
      'MATCH (p)-[:mykg_subPropertyOf]->(p2) RETURN p.mykg_URI as URI',
      'MATCH (p2)-[:mykg_subPropertyOf]->(p) RETURN p.mykg_URI as URI',
      'MATCH (p)-[:mykg_domain]->(c) RETURN p.mykg_URI as URI',
      'MATCH (p)-[:mykg_range]->(c) RETURN p.mykg_URI as URI',
      'MATCH (i1)-[p]->(i2) RETURN type(p) as URI'
    )

    instance_query = 'MATCH (n) RETURN n.mykg_URI as URI'

    meta_query = """MATCH (c1)-[p]->(c2)
                    WHERE c1.mykg_URI STARTS WITH "mykg_" and c2.mykg_URI STARTS WITH "mykg_"
                    RETURN c1.mykg_URI as sub ,type(p) as pred, c2.mykg_URI as obj"""

    full_meta_structure = KnowledgeGraph(meta_structure = 2)
    full_meta_structure.perform_deduction(3)
    full_meta_triples = full_meta_structure.triples
    full_meta_classes = full_meta_structure.classes
    full_meta_properties = full_meta_structure.properties

    classes = set()
    properties = set()
    instances = set()
    meta_triples = set()

    for query in class_queries:
      results = conn.query(query)
      for i in results:
        classes.add(i.data()['URI'])

    for query in property_queries:
      results = conn.query(query)
      for i in results:
        properties.add(i.data()['URI'])

    results = conn.query(instance_query)
    for i in results:
      instances.add(i.data()['URI'])

    results = conn.query(meta_query)
    for i in results:
      triple = i.data()
      triple = triple['sub'], triple['pred'], triple['obj']
      meta_triples.add(triple)

    instances = instances - classes - properties - full_meta_classes - full_meta_properties

    cond1 = any(x[0:5] == 'mykg_' and x not in full_meta_classes for x in classes)
    cond2 = any(x[0:5] == 'mykg_' for x in instances)
    cond3 = any(x[0:5] == 'mykg_' and x not in full_meta_properties for x in properties)
    if cond1 or cond2  or cond3:
      raise ValueError("Namespace 'mykg_' used improperly")

    if not (meta_triples.issubset(full_meta_triples)):
      raise ValueError("Metastructure used improperly")

    if len(classes & properties) != 0:
      raise ValueError("Symbols appears in classes and properties position:", classes & properties)
    elif len(properties & instances) != 0:
      raise ValueError("Symbol appears in both properties and instances position:", properties & instances)
    elif len(classes & instances) != 0:
      raise ValueError("Symbol appears in both classes and instances position:", classes & instances)


  def add_knowledge(self, input: Neo4jConnection|str) -> None:

    if type(input) == str:
      neo4j_KnowledgeGraph.__from_text__(conn, input)
    elif type(input) == Neo4jConnection:
      neo4j_KnowledgeGraph.__from_neo4j__(conn, input)
    else:
      raise ValueError("Error: input should be a string or a Neo4j connection")


  def to_text(self) -> str:

    query = """MATCH (n1)-[p]->(n2)
               RETURN n1.mykg_URI as n1, type(p) as p, n2.mykg_URI as n2"""
    text = ""

    results = self.conn.query(query)
    for i in results:
      i = i.data()
      line = "{n1} {p} {n2}".format(n1 = i['n1'], p = i['p'], n2 = i['n2'])
      text += line + "\n"

    text = text[0:-1]
    return text


  def __str__(self) -> str:
    return self.to_text()


  def __len__(self) -> int:
    query = "MATCH (node1) -[edge]-> (node2) RETURN count(*) as len"
    result = conn.query(query)
    return result[0].data()["len"]


  def __syntax_deductions__(self):

    class_queries = (
      'MATCH (c)-[:mykg_subClassOf]->(c2), (class {mykg_URI: "mykg_Class"}) MERGE (c)-[:mykg_type]->(class)',
      'MATCH (c2)-[:mykg_subClassOf]->(c), (class {mykg_URI: "mykg_Class"}) MERGE (c)-[:mykg_type]->(class)',
      'MATCH (p)-[:mykg_domain]->(c), (class {mykg_URI: "mykg_Class"}) MERGE (c)-[:mykg_type]->(class)',
      'MATCH (p)-[:mykg_range]->(c), (class {mykg_URI: "mykg_Class"}) MERGE (c)-[:mykg_type]->(class)',
      'MATCH (i)-[:mykg_type]->(c), (class {mykg_URI: "mykg_Class"}) MERGE (c)-[:mykg_type]->(class)'
    )

    property_queries = (
      'MATCH (p)-[:mykg_subPropertyOf]->(p2), (prop {mykg_URI: "mykg_Property"}) MERGE (p)-[:mykg_type]->(prop)',
      'MATCH (p2)-[:mykg_subPropertyOf]->(p), (prop {mykg_URI: "mykg_Property"}) MERGE (p)-[:mykg_type]->(prop)',
      'MATCH (p)-[:mykg_domain]->(c), (prop {mykg_URI: "mykg_Property"}) MERGE (p)-[:mykg_type]->(prop)',
      'MATCH (p)-[:mykg_range]->(c), (prop {mykg_URI: "mykg_Property"}) MERGE (p)-[:mykg_type]->(prop)',
      'MATCH (i1)-[p]->(i2), (prop {mykg_URI: "mykg_Property"}) WHERE NOT type(p) STARTS WITH "mykg_" MERGE ({mykg_URI:type(p)})-[:mykg_type]->(prop)'
    )

    instance_query = """MATCH (c)-[:mykg_type]->({mykg_URI:"mykg_Class"}), (p)-[:mykg_type]->({mykg_URI:"mykg_Property"})
                        WITH collect(distinct c) as list1, collect(distinct p) as list2
                        MATCH (n), (res {mykg_URI:"mykg_Resource"})
                        WHERE NOT n in list1 AND NOT n in list2
                        MERGE (n)-[:mykg_type]->(res)"""

    meta_node_query = 'MERGE (:mykg_Node {mykg_URI:$mykg_URI})'
    meta_edge_query = 'MATCH (n1 {mykg_URI:$n1}), (n2 {mykg_URI:$n2}) MERGE (n1)-[:%s]->(n2)'

    meta_triples, meta_classes, meta_properties = self.__meta_structure__(self.meta_structure)

    for c in meta_classes:
      self.conn.query(meta_node_query, {"mykg_URI": c})
      query = meta_edge_query % "mykg_type"
      self.conn.query(query, {'n1': c, 'n2': "mykg_Class"})

    for p in meta_properties:
      self.conn.query(meta_node_query, {"mykg_URI": p})
      query = meta_edge_query % "mykg_type"
      self.conn.query(query, {'n1': p, 'n2': "mykg_Property"})

    for sub, pred, obj in meta_triples:
      query = meta_edge_query % pred
      self.conn.query(query, {"n1": sub, "n2": obj})

    for query in class_queries:
      self.conn.query(query)

    for query in property_queries:
      self.conn.query(query)

    self.conn.query(instance_query)


  def __instance_based_completion__(self):

    subclass_query = """MATCH (n1)-[:mykg_type]->(n2)-[:mykg_subClassOf]->(n3)
                        MERGE (n1)-[:mykg_type]->(n3)
                        RETURN count(*)"""

    subproperty_query = """MATCH (n1)-[e]->(n2), (n3)-[:mykg_subPropertyOf]->(n4)
                          WHERE n3.mykg_URI = type(e)
                          CALL apoc.merge.relationship(n1, n4.mykg_URI, {}, {}, n2, {})
                          YIELD rel
                          RETURN count(*)"""

    domain_query = """MATCH (n1)-[e]->(n2), (n3)-[:mykg_domain]->(n4)
                      WHERE type(e) = n3.mykg_URI
                      MERGE (n1)-[:mykg_type]->(n4)
                      RETURN count(*)"""

    range_query = """MATCH (n1)-[e]->(n2), (n3)-[:mykg_range]->(n4)
                     WHERE type(e) = n3.mykg_URI
                     MERGE (n2)-[:mykg_type]->(n4)
                     RETURN count(*)"""

    old_matches = 0
    fixpoint = False
    while not fixpoint:
      matches = 0

      matches += self.conn.query(subclass_query)[0].data()['count(*)']
      matches += self.conn.query(subproperty_query)[0].data()['count(*)']
      matches += self.conn.query(domain_query)[0].data()['count(*)']
      matches += self.conn.query(range_query)[0].data()['count(*)']

      fixpoint = (matches == old_matches)
      old_matches = matches


  def __class_property_completion__(self):

    subclass_query = """MATCH (n1)-[:mykg_subClassOf]->(n2)-[:mykg_subClassOf]->(n3)
                        MERGE (n1)-[:mykg_subClassOf]->(n3)
                        RETURN count(*)"""

    subproperty_query = """MATCH (n1)-[:mykg_subPropertyOf]->(n2)-[:mykg_subPropertyOf]->(n3)
                          MERGE (n1)-[:mykg_subPropertyOf]->(n3)
                          RETURN count(*)"""

    domain_query = """MATCH (n1)-[:mykg_subPropertyOf]->(n2)-[:mykg_domain]->(n3)
                      MERGE (n1)-[:mykg_domain]->(n3)
                      RETURN count(*)"""

    range_query = """MATCH (n1)-[:mykg_subPropertyOf]->(n2)-[:mykg_range]->(n3)
                     MERGE (n1)-[:mykg_range]->(n3)
                     RETURN count(*)"""

    domain_query2 = """MATCH (c1)-[:mykg_subClassOf]->(c2), (p)-[:mykg_domain]->(c1)
                       MERGE (p)-[:mykg_domain]->(c2)
                       RETURN count(*)"""

    range_query2 = """MATCH (c1)-[:mykg_subClassOf]->(c2), (p)-[:mykg_range]->(c1)
                      MERGE (p)-[:mykg_range]->(c2)
                      RETURN count(*)"""

    old_matches = 0
    fixpoint = False
    while not fixpoint:
      matches = 0

      matches += self.conn.query(subclass_query)[0].data()['count(*)']
      matches += self.conn.query(subproperty_query)[0].data()['count(*)']
      matches += self.conn.query(domain_query)[0].data()['count(*)']
      matches += self.conn.query(range_query)[0].data()['count(*)']
      matches += self.conn.query(domain_query2)[0].data()['count(*)']
      matches += self.conn.query(range_query2)[0].data()['count(*)']

      fixpoint = (matches == old_matches)
      old_matches = matches


  def __class_to_label__(self) -> None:

    find_query = 'MATCH (node)-[:mykg_type]->(class) return node,class'
    update_query = 'MATCH (node {mykg_URI:$mykg_URI}) SET node :%s RETURN node'

    results = self.conn.query(find_query)

    meta_triples, meta_classes, meta_properties = self.__meta_structure__(self.meta_structure)

    for n in meta_classes:
      query = update_query % 'MetaClass'
      self.conn.query(query, {'mykg_URI': n})

    for n in meta_properties:
      query = update_query % 'MetaProperty'
      self.conn.query(query, {'mykg_URI': n})

    for elem in results:
      n = elem.data()['node']['mykg_URI']
      c = elem.data()['class']['mykg_URI']
      query = update_query % c
      self.conn.query(query, {'mykg_URI': n})


  def perform_deduction(self, level: int = 1) -> None:

    assert level in [1, 2, 3]

    if level == 1:
      self.__syntax_deductions__()
    elif level == 2:
      self.__syntax_deductions__()
      self.__instance_based_completion__()
    elif level == 3:
      self.__syntax_deductions__()
      self.__instance_based_completion__()
      self.__class_property_completion__()

    self.__class_to_label__()

# Examples

## Example 1

Syntax Checker on Metastructure (slide 6)

In [None]:
legal = """
mykg_Class mykg_type mykg_Resource,
mykg_Resource mykg_type mykg_Resource,
Person mykg_type mykg_Class
"""

illegal1 = """
mykg_type mykg_type mykg_Class,
mykg_subPropertyOf mykg_type mykg_Class
"""

illegal2 = """
mykg_Person mykg_type mykg_Resource
"""

In [None]:
kg = KnowledgeGraph(legal)

In [None]:
kg = KnowledgeGraph(illegal1)

ValueError: ('Metastructure used improperly:', ('mykg_type', 'mykg_type', 'mykg_Class'))

In [None]:
kg = KnowledgeGraph(illegal2)

ValueError: ("Namespace 'mykg_' used improperly:", ('mykg_Person', 'mykg_type', 'mykg_Resource'))

## Example 2

Complete deduction of the full metastructure (slide 13)

In [None]:
conn.reset_db()
kg = KnowledgeGraph(meta_structure = 2)
kg.perform_deduction(level = 3, conn=conn)

# can be visualized both in neo4j and python
print(kg)

Reset Executed!
mykg_type mykg_type mykg_Resource
mykg_subPropertyOf mykg_range mykg_Resource
mykg_domain mykg_range mykg_Class
mykg_range mykg_domain mykg_Property
mykg_domain mykg_type mykg_Resource
mykg_Resource mykg_type mykg_Class
mykg_range mykg_type mykg_Resource
mykg_Property mykg_type mykg_Class
mykg_subPropertyOf mykg_domain mykg_Property
mykg_subClassOf mykg_domain mykg_Class
mykg_range mykg_range mykg_Class
mykg_subClassOf mykg_range mykg_Class
mykg_subClassOf mykg_type mykg_Resource
mykg_type mykg_domain mykg_Resource
mykg_domain mykg_domain mykg_Resource
mykg_Property mykg_subClassOf mykg_Resource
mykg_subPropertyOf mykg_type mykg_Resource
mykg_type mykg_type mykg_Property
mykg_subPropertyOf mykg_range mykg_Property
mykg_Class mykg_type mykg_Resource
mykg_Class mykg_subClassOf mykg_Resource
mykg_domain mykg_type mykg_Property
mykg_range mykg_type mykg_Property
mykg_range mykg_domain mykg_Resource
mykg_subClassOf mykg_domain mykg_Resource
mykg_type mykg_range mykg_Resource

## Example 3

Syntax Deduction about user-defined classes, properties and instances (slide 16)

In [None]:
# Shows:
# -) infer that Student, Researcher and Person are classes
# -) infer that hasSupervisor is a property
# -) infer that Frank and Jeen are instances

text = """
Student mykg_subClassOf Person,
Researcher mykg_subClassOf Person,
hasSupervisor mykg_range Researcher,
hasSupervisor mykg_domain Student,
Frank hasSupervisor Jeen
"""

In [None]:
conn.reset_db()
kg = KnowledgeGraph(text)
kg.perform_deduction(level = 1, conn=conn)

# can be visualized both in neo4j and python
print(kg)

Reset Executed!
mykg_Class mykg_subClassOf mykg_Resource
hasSupervisor mykg_type mykg_Property
Student mykg_type mykg_Class
mykg_Resource mykg_type mykg_Class
hasSupervisor mykg_domain Student
Researcher mykg_subClassOf Person
mykg_Property mykg_type mykg_Class
Student mykg_subClassOf Person
hasSupervisor mykg_range Researcher
Frank mykg_type mykg_Resource
Researcher mykg_type mykg_Class
Person mykg_type mykg_Class
Jeen mykg_type mykg_Resource
mykg_Property mykg_subClassOf mykg_Resource
mykg_Class mykg_type mykg_Class
Frank hasSupervisor Jeen


## Example 4

Instance-Based deduction (slide 18)

In [None]:
# Shows:
# -) Frank belongs to the classes Student and Person (rules 1 and 3)
# -) Jeen belongs to the classes Researcher and Person (rules 1 and 4)
# -) Frank knows Jeen (rule 2)
# -) (also deduces that classes and properties are resources)

text = """
Student mykg_subClassOf Person,
Researcher mykg_subClassOf Person,
hasSupervisor mykg_range Researcher,
hasSupervisor mykg_domain Student,
Frank hasSupervisor Jeen,
hasSupervisor mykg_subPropertyOf knows
"""

# added the last triple

In [None]:
conn.reset_db()
kg = KnowledgeGraph(text)
kg.perform_deduction(level = 2, conn=conn)

# can be visualized both in neo4j and python
print(kg)

Reset Executed!
Person mykg_type mykg_Resource
mykg_Class mykg_subClassOf mykg_Resource
knows mykg_type mykg_Property
Frank knows Jeen
hasSupervisor mykg_type mykg_Property
Student mykg_type mykg_Class
knows mykg_type mykg_Resource
mykg_Resource mykg_type mykg_Class
hasSupervisor mykg_domain Student
Jeen mykg_type Person
Researcher mykg_subClassOf Person
hasSupervisor mykg_type mykg_Resource
mykg_Property mykg_type mykg_Class
hasSupervisor mykg_subPropertyOf knows
Student mykg_subClassOf Person
hasSupervisor mykg_range Researcher
mykg_Resource mykg_type mykg_Resource
Student mykg_type mykg_Resource
mykg_Property mykg_type mykg_Resource
Frank mykg_type mykg_Resource
Researcher mykg_type mykg_Class
Person mykg_type mykg_Class
Jeen mykg_type mykg_Resource
mykg_Property mykg_subClassOf mykg_Resource
Jeen mykg_type Researcher
mykg_Class mykg_type mykg_Class
Frank mykg_type Student
Frank mykg_type Person
Frank hasSupervisor Jeen
mykg_Class mykg_type mykg_Resource
Researcher mykg_type mykg_Re

## Example 5

Class-Property deduction (slide 21)

In [None]:
# Shows:
# -) Student and Researcher are subclasses of Being (rule 1)
# -) isDoctorateOf is a subproperty of knows (rule 2)
# -) isDoctorate of has as domain Student and as range Researcher (rule 3, 4)
# -) hasSupervisor has as domain also Person and as range also Person (rule 5, 6)
# -) and many more ...

text = """
Student mykg_subClassOf Person,
Researcher mykg_subClassOf Person,
hasSupervisor mykg_range Researcher,
hasSupervisor mykg_domain Student,
Frank hasSupervisor Jeen,
hasSupervisor mykg_subPropertyOf knows,
Person mykg_subClassOf Being,
isDoctorateOf mykg_subPropertyOf hasSupervisor
"""

# added the last two triples

In [None]:
conn.reset_db()
kg = KnowledgeGraph(text)
kg.perform_deduction(level = 3, conn=conn)

# can be visualized both in neo4j and python
print(kg)

## Example 6

Neo4j Graph Structure and Adding Knowledge (slide 25)

In [None]:
text1 = """
Person mykg_type mykg_Class,
hasChild mykg_type mykg_Property,
Lenzerini hasChild Giovanni,
hasChild mykg_domain Person,
hasChild mykg_range Person
"""

text2 = """
Entity mykg_subClassOf Mortal,
Person mykg_subClassOf Entity
"""

In [None]:
conn.reset_db()
kg = KnowledgeGraph(text1, meta_structure = 1)
kg.perform_deduction(level = 3, conn=conn)

# can be visualized both in neo4j and python
print(kg)

Reset Executed!
mykg_range mykg_type mykg_Property
mykg_type mykg_type mykg_Property
mykg_domain mykg_type mykg_Resource
Person mykg_type mykg_Resource
Lenzerini mykg_type Person
mykg_subClassOf mykg_type mykg_Resource
Person mykg_type mykg_Class
Giovanni mykg_type Person
mykg_subPropertyOf mykg_type mykg_Property
hasChild mykg_type mykg_Resource
Lenzerini mykg_type mykg_Resource
Giovanni mykg_type mykg_Resource
mykg_Class mykg_subClassOf mykg_Resource
mykg_range mykg_type mykg_Resource
mykg_type mykg_type mykg_Resource
mykg_Property mykg_subClassOf mykg_Resource
mykg_domain mykg_type mykg_Property
mykg_Class mykg_type mykg_Resource
mykg_Class mykg_type mykg_Class
mykg_subClassOf mykg_type mykg_Property
mykg_subPropertyOf mykg_type mykg_Resource
hasChild mykg_domain Person
mykg_Resource mykg_type mykg_Resource
hasChild mykg_range Person
mykg_Resource mykg_type mykg_Class
mykg_Property mykg_type mykg_Resource
mykg_Property mykg_type mykg_Class
Lenzerini hasChild Giovanni
hasChild mykg_t

In [None]:
kg.add_knowledge(text2)
kg.perform_deduction(level = 3, conn=conn)

# can be visualized both in neo4j and python
print(kg)

Giovanni mykg_type Mortal
mykg_type mykg_type mykg_Property
Person mykg_type mykg_Resource
Lenzerini mykg_type Person
Giovanni mykg_type Entity
Person mykg_type mykg_Class
Entity mykg_subClassOf Mortal
hasChild mykg_type mykg_Resource
Giovanni mykg_type mykg_Resource
mykg_Class mykg_subClassOf mykg_Resource
mykg_range mykg_type mykg_Resource
mykg_Class mykg_type mykg_Resource
hasChild mykg_domain Entity
hasChild mykg_range Mortal
mykg_Class mykg_type mykg_Class
hasChild mykg_range Entity
mykg_subPropertyOf mykg_type mykg_Resource
hasChild mykg_domain Mortal
mykg_Resource mykg_type mykg_Resource
mykg_Resource mykg_type mykg_Class
Entity mykg_type mykg_Resource
Lenzerini hasChild Giovanni
Entity mykg_type mykg_Class
hasChild mykg_type mykg_Property
Lenzerini mykg_type Mortal
mykg_range mykg_type mykg_Property
mykg_domain mykg_type mykg_Resource
Lenzerini mykg_type Entity
mykg_subClassOf mykg_type mykg_Resource
Giovanni mykg_type Person
mykg_subPropertyOf mykg_type mykg_Property
Mortal my

## Example 7

Implementation with Neo4j and Comparison between the two (slide 26)

In [None]:
text1 = """
Person mykg_type mykg_Class,
hasChild mykg_type mykg_Property,
Lenzerini hasChild Giovanni,
hasChild mykg_domain Person,
hasChild mykg_range Person
"""

text2 = """
Entity mykg_subClassOf Mortal,
Person mykg_subClassOf Entity,
hasChild mykg_type mykg_Class
"""

In [None]:
kg2 = neo4j_KnowledgeGraph(conn, text1, meta_structure = 1)
kg2.perform_deduction(level = 3)

# can be visualized both in neo4j and python
print(kg2)

Reset Executed!
hasChild mykg_domain Person
hasChild mykg_type mykg_Property
hasChild mykg_range Person
Person mykg_type mykg_Class
Lenzerini hasChild Giovanni
mykg_Property mykg_type mykg_Class
mykg_Class mykg_type mykg_Class
mykg_Resource mykg_type mykg_Class
mykg_range mykg_type mykg_Property
mykg_subClassOf mykg_type mykg_Property
mykg_type mykg_type mykg_Property
mykg_subPropertyOf mykg_type mykg_Property
mykg_domain mykg_type mykg_Property
mykg_Class mykg_subClassOf mykg_Resource
mykg_Property mykg_subClassOf mykg_Resource
Giovanni mykg_type mykg_Resource
Lenzerini mykg_type mykg_Resource
mykg_Resource mykg_type mykg_Resource
mykg_Class mykg_type mykg_Resource
mykg_Property mykg_type mykg_Resource
Person mykg_type mykg_Resource
mykg_domain mykg_type mykg_Resource
mykg_subPropertyOf mykg_type mykg_Resource
mykg_type mykg_type mykg_Resource
mykg_subClassOf mykg_type mykg_Resource
mykg_range mykg_type mykg_Resource
hasChild mykg_type mykg_Resource
Lenzerini mykg_type Person
Giovanni

In [None]:
kg2.add_knowledge(text2)
kg2.perform_deduction(level = 3)

# can be visualized both in neo4j and python
print(kg2)

ValueError: ('Symbols appears in classes and properties position:', {'hasChild'})

In [None]:
kg_triples, _, _, _ = KnowledgeGraph.__from_text__(kg.__str__(), separator = '\n')
kg2_triples, _, _, _ = KnowledgeGraph.__from_text__(kg2.__str__(), separator = '\n')

kg2_triples == kg_triples

True

## Example 8

TIme comparison (slide 30)

In [None]:
text1 = """
Marco loves Arianna,
loves mykg_domain Person,
loves mykg_range Person
"""

###

text2 = """
Employee mykg_type mykg_Class,
Office mykg_type mykg_Class,
worksIn mykg_type mykg_Property,
isHeadOf mykg_type mykg_Property,
worksIn mykg_domain Employee,
isHeadOf mykg_domain Employee,
worksIn mykg_range Office,
isHeadOf mykg_range Office,
isHeadOf mykg_subPropertyOf worksIn,
John mykg_type Employee,
Mary mykg_type Employee,
SalesOffice mykg_subClassOf Office,
Zara mykg_type SalesOffice,
John worksIn Zara,
Mary isHeadOf Zara,
Office mykg_subClassOf Unit,
Laboratory mykg_subClassOf Unit
"""

###

text3 = """
Student mykg_subClassOf Person,
Researcher mykg_subClassOf Person,
hasSupervisor mykg_range Researcher,
hasSupervisor mykg_domain Student,
Frank hasSupervisor Jeen
"""

###

text4 = """
Animal mykg_subClassOf Being,
Mammal mykg_subClassOf Animal,
Human mykg_subClassOf Mammal,
Cow mykg_subClassOf Mammal,
Student mykg_subClassOf Human,
Researcher mykg_subClassOf Human,
isFriendOf mykg_subPropertyOf knows,
isBestFriendOf mykg_subPropertyOf isFriendOf,
isBroOf mykg_subPropertyOf isBestFriendOf,
isFriendOf mykg_domain Cow,
isBestFriendOf mykg_range Human,
Mark mykg_type Student,
Bob mykg_type Cow,
Bob mykg_type Researcher,
Bob isBroOf Mark,
John isBroOf Jeff
"""

###

text5 = """
Professor mykg_subClassOf Researcher,
Researcher mykg_subClassOf Person,
Follows mykg_domain Student,
Follows mykg_range Course,
Teaches mykg_domain Professor,
Teaches mykg_range Course,
Francesco Follows NeuroEngineering,
Lenzerini Teaches DataManagement,
Maria Follows ArtificialIntelligence,
AsksThesis mykg_domain Student,
AsksThesis mykg_range Professor,
Mario AsksThesis Lenzerini,
Luca AsksThesis Silvestri
"""

###

text6 = """
Commands mykg_subPropertyOf worksWith,
Commands mykg_subPropertyOf isSuperiorOf,
Employee mykg_subClassOf Person,
Boss mykg_subClassOf Employee,
worksWith mykg_domain Employee,
isSuperiorOf mykg_domain Boss,
worksWith mykg_range Employee,
Mario Commands Luca
"""

###

text7 = """
Person mykg_type mykg_Class,
hasChild mykg_type mykg_Property,
Lenzerini hasChild Giovanni,
hasChild mykg_domain Person,
hasChild mykg_range Person
"""

###

text8 = """
Entity mykg_subClassOf Mortal,
Person mykg_subClassOf Entity
"""

###

text9 = """
hasChild mykg_type mykg_Resource,
Antonio hasChild Luca,
mykg_subPropertyOf mykg_domain mykg_Property,
mykg_subPropertyOf mykg_range mykg_Property
"""

###

text10 = """
A mykg_subClassOf INIZIO,
B mykg_subClassOf A,
C mykg_subClassOf B,
D mykg_subClassOf C,
E mykg_subClassOf D,
F mykg_subClassOf E,
G mykg_subClassOf F,
H mykg_subClassOf G,
I mykg_subClassOf H,
J mykg_subClassOf I,
K mykg_subClassOf J,
L mykg_subClassOf K,
M mykg_subClassOf L,
N mykg_subClassOf M,
O mykg_subClassOf N,
P mykg_subClassOf O,
Q mykg_subClassOf P,
R mykg_subClassOf Q,
S mykg_subClassOf R,
T mykg_subClassOf S,
U mykg_subClassOf T,
V mykg_subClassOf U,
W mykg_subClassOf V,
X mykg_subClassOf W,
Y mykg_subClassOf X,
Z mykg_subClassOf Y,
z1 mykg_type Z,
z2 mykg_type Z,
z3 mykg_type Z,
z4 mykg_type Z,
z5 mykg_type Z,
z6 mykg_type Z,
z7 mykg_type Z,
z8 mykg_type Z,
z9 mykg_type Z,
z10 mykg_type Z,
z11 mykg_type Z,
z12 mykg_type Z,
z13 mykg_type Z,
z14 mykg_type Z,
z15 mykg_type Z,
z16 mykg_type Z,
z17 mykg_type Z,
z18 mykg_type Z,
z19 mykg_type Z,
z20 mykg_type Z,
z21 mykg_type Z,
z22 mykg_type Z,
z23 mykg_type Z,
z24 mykg_type Z
"""

In [None]:
# on python, without writing on neo4j

conn.reset_db()
kg = KnowledgeGraph(text1, meta_structure = 2)
kg.add_knowledge(text2)
kg.add_knowledge(text3)
kg.add_knowledge(text4)
kg.add_knowledge(text5)
kg.add_knowledge(text6)
kg.add_knowledge(text7)
kg.add_knowledge(text8)
kg.add_knowledge(text9)
kg.add_knowledge(text10)

t1 = time.time()
kg.perform_deduction(level = 3)
t2 = time.time()
print(t2-t1)

Reset Executed!
10.086181402206421


In [None]:
# on neo4j

conn.reset_db()
kg = neo4j_KnowledgeGraph(conn, text1, meta_structure = 2)
kg.add_knowledge(text2)
kg.add_knowledge(text3)
kg.add_knowledge(text4)
kg.add_knowledge(text5)
kg.add_knowledge(text6)
kg.add_knowledge(text7)
kg.add_knowledge(text8)
kg.add_knowledge(text9)
kg.add_knowledge(text10)

t1 = time.time()
kg.perform_deduction(level = 3)
t2 = time.time()
print(t2-t1)

Reset Executed!
Reset Executed!
Query failed: {code: Neo.ClientError.Statement.SyntaxError} {message: Procedure call inside a query does not support naming results implicitly (name explicitly using `YIELD` instead) (line 3, column 27 (offset: 135))
"                          CALL apoc.merge.relationship(n1, n4.mykg_URI, {}, {}, n2, {})"
                           ^}


TypeError: 'NoneType' object is not subscriptable

In [None]:
# on python, writing on neo4j

conn.reset_db()
kg = KnowledgeGraph(text1, meta_structure = 2)
kg.add_knowledge(text2)
kg.add_knowledge(text3)
kg.add_knowledge(text4)
kg.add_knowledge(text5)
kg.add_knowledge(text6)
kg.add_knowledge(text7)
kg.add_knowledge(text8)
kg.add_knowledge(text9)
kg.add_knowledge(text10)

t1 = time.time()
kg.perform_deduction(level = 3, conn = conn)
t2 = time.time()
print(t2-t1)

Reset Executed!
90.01014304161072


# Datasets

In [None]:
# Mostra:
# -) correttezza della sintassi e syntax deduction
# -) deduzione del fatto che marco e arianna sono persone
test1right = """
Marco loves Arianna,
loves mykg_domain Person,
loves mykg_range Person
"""

###

# Mostra:
# -) correttezza della sintassi (lancia ValueError) in quanto Person compare sia
#    in class position che in instance position
test1wrong = """
Marco loves Person,
loves mykg_domain Person,
loves mykg_range Person
"""

###

# Mostra:
# -) esempio riportato sulle slides
test2 = """
Employee mykg_type mykg_Class,
Office mykg_type mykg_Class,
worksIn mykg_type mykg_Property,
isHeadOf mykg_type mykg_Property,
worksIn mykg_domain Employee,
isHeadOf mykg_domain Employee,
worksIn mykg_range Office,
isHeadOf mykg_range Office,
isHeadOf mykg_subPropertyOf worksIn,
John mykg_type Employee,
Mary mykg_type Employee,
SalesOffice mykg_subClassOf Office,
Zara mykg_type SalesOffice,
John worksIn Zara,
Mary isHeadOf Zara,
Office mykg_subClassOf Unit,
Laboratory mykg_subClassOf Unit
"""

###

# Mostra:
# -) derivare classe dell'istanze da range/dominio
# -) derivare range/dominio della classe da sottoclasse
# -) derivare un nodo proprietà dall'utilizzo
# -) derivare appartenenza dell'istanza alle superclassi
test3 = """
Student mykg_subClassOf Person,
Researcher mykg_subClassOf Person,
hasSupervisor mykg_range Researcher,
hasSupervisor mykg_domain Student,
Frank hasSupervisor Jeen
"""

###

test4 = """
Animal mykg_subClassOf Being,
Mammal mykg_subClassOf Animal,
Human mykg_subClassOf Mammal,
Cow mykg_subClassOf Mammal,
Student mykg_subClassOf Human,
Researcher mykg_subClassOf Human,
isFriendOf mykg_subPropertyOf knows,
isBestFriendOf mykg_subPropertyOf isFriendOf,
isBroOf mykg_subPropertyOf isBestFriendOf,
isFriendOf mykg_domain Cow,
isBestFriendOf mykg_range Human,
Mark mykg_type Student,
Bob mykg_type Cow,
Bob mykg_type Researcher,
Bob isBroOf Mark,
John isBroOf Jeff
"""

###

# Mostra:
# -) un mondo di nostra invenzione

#ogni professore è una persona e un ricercatore
#ogni ricercatore è una persona
#due persone possono essere amiche
#ogni corso è frequentato da studenti
#ogni corso appartiene a un corso di laurea
#ogni corso è insegnato da almeno un professore
#ogni innamorato è anche un amico (si deve essere amici per essere innamorati)
#ogni studente può chiedere una tesi su un corso che segue

test5 = """
Professor mykg_subClassOf Researcher,
Researcher mykg_subClassOf Person,
Follows mykg_domain Student,
Follows mykg_range Course,
Teaches mykg_domain Professor,
Teaches mykg_range Course,
Francesco Follows NeuroEngineering,
Lenzerini Teaches DataManagement,
Maria Follows ArtificialIntelligence,
AsksThesis mykg_domain Student,
AsksThesis mykg_range Professor,
Mario AsksThesis Lenzerini,
Luca AsksThesis Silvestri
"""

###

test6 = """
Commands mykg_subPropertyOf worksWith,
Commands mykg_subPropertyOf isSuperiorOf,
Employee mykg_subClassOf Person,
Boss mykg_subClassOf Employee,
worksWith mykg_domain Employee,
isSuperiorOf mykg_domain Boss,
worksWith mykg_range Employee,
Mario Commands Luca
"""

###

# Mostra:
# -) Una deduzione che avviene integrando due dataset
test7a = """
Person mykg_type mykg_Class,
hasChild mykg_type mykg_Property,
Lenzerini hasChild Giovanni,
hasChild mykg_domain Person,
hasChild mykg_range Person
"""

###

test7b = """
Entity mykg_subClassOf Mortal,
Person mykg_subClassOf Entity
"""

###

test8 = """
hasChild mykg_type mykg_Resource,
Antonio hasChild Luca,
mykg_subPropertyOf mykg_domain mykg_Property,
mykg_subPropertyOf mykg_range mykg_Property
"""

###

test9_wrong = """
Person mykg_subClassOf Being,
mykg_Resource mykg_subClassOf Person
"""

###

test_metastructure = """
mykg_type mykg_type mykg_Resource,
mykg_domain mykg_domain mykg_Resource,
mykg_Class mykg_type mykg_Class,
mykg_Resource mykg_type mykg_Resource
"""

###

test_metastructure_wrong = """
mykg_type mykg_type mykg_Class
"""

###

test_long = """
A mykg_subClassOf INIZIO,
B mykg_subClassOf A,
C mykg_subClassOf B,
D mykg_subClassOf C,
E mykg_subClassOf D,
F mykg_subClassOf E,
G mykg_subClassOf F,
H mykg_subClassOf G,
I mykg_subClassOf H,
J mykg_subClassOf I,
K mykg_subClassOf J,
L mykg_subClassOf K,
M mykg_subClassOf L,
N mykg_subClassOf M,
O mykg_subClassOf N,
P mykg_subClassOf O,
Q mykg_subClassOf P,
R mykg_subClassOf Q,
S mykg_subClassOf R,
T mykg_subClassOf S,
U mykg_subClassOf T,
V mykg_subClassOf U,
W mykg_subClassOf V,
X mykg_subClassOf W,
Y mykg_subClassOf X,
Z mykg_subClassOf Y,
z1 mykg_type Z,
z2 mykg_type Z,
z3 mykg_type Z,
z4 mykg_type Z,
z5 mykg_type Z,
z6 mykg_type Z,
z7 mykg_type Z,
z8 mykg_type Z,
z9 mykg_type Z,
z10 mykg_type Z,
z11 mykg_type Z,
z12 mykg_type Z,
z13 mykg_type Z,
z14 mykg_type Z,
z15 mykg_type Z,
z16 mykg_type Z,
z17 mykg_type Z,
z18 mykg_type Z,
z19 mykg_type Z,
z20 mykg_type Z,
z21 mykg_type Z,
z22 mykg_type Z,
z23 mykg_type Z,
z24 mykg_type Z
"""

###

test10 = """
Marco isFriendOf Paolo,
isFriendOf mykg_subPropertyOf knows
"""