In this notebook, the dataset consisting of mines joined with landuse and polygons are converted to a knowledge graph

In [1]:
import kglab
import icecream
import rdflib
from rdflib import Namespace

In [2]:
# specif config file and name spaces, build KG

config = "config.ini"

namespaces = {
    "ex":  "http://mine-db.org/",
    "rr": "http://www.w3.org/ns/r2rml#",
    "rml": "http://semweb.mmlab.be/ns/rml#",
    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
    "ql": "http://semweb.mmlab.be/ns/ql#",
    "map": "http://mapping.example.com/",
    "ma": "http://www.w3.org/ns/ma-ont#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "sd": "http://www.w3.org/ns/sparql-service-description#",
    "schema": "https://schema.org/",
    "mo": "http://purl.org/ontology/mo/",
    "dbo": "https://dbpedia.org/ontology/",
    "dbp": "https://dbpedia.org/property/",
    "dbpage": "https://dbpedia.org/page/",
    "geo": "https://www.w3.org/2003/01/geo/wgs84_pos#"
    }

kg = kglab.KnowledgeGraph(
    name = "A KG on mining",
    namespaces = namespaces,
    )


In [3]:
# insert all the nodes and edges according to the RML files 
kg.materialize(config)

INFO | 2023-04-03 18:20:59,703 | Parallelization is not supported for win32 when running as a library. If you need to speed up your data integration pipeline, please run through the command line.
INFO | 2023-04-03 18:21:11,867 | 40 mapping rules retrieved.
INFO | 2023-04-03 18:21:11,909 | Mapping partition with 28 groups generated.
INFO | 2023-04-03 18:21:11,912 | Maximum number of rules within mapping group: 3.
INFO | 2023-04-03 18:21:11,917 | Mappings processed in 12.200 seconds.
INFO | 2023-04-03 18:21:17,075 | Number of triples generated in total: 172390.


<kglab.kglab.KnowledgeGraph at 0x2920cdcf580>

In [4]:
# kg.get_ns_dict()

In [5]:
# to_print = kg.rdf_graph().serialize(format = "ttl")[0:5000]
# print(to_print)

In [6]:
# # override the namespace
# kg.add_ns("schema", "https://schema.org/", override=True, replace=True)
# kg.add_ns("dbp", "https://dbpedia.org/property/", override=True, replace=True)
# kg.add_ns("dbo", "https://dbpedia.org/ontology/", override=True, replace=True)
# kg.add_ns("ns4", "http://www.mine-db.org/", override=True, replace=True)
# kg.add_ns("ex", "http://www.mine-db.org/", override=True, replace=True)
# kg.add_ns("mine", "http://www.mine-db.org/", override=True, replace=True)
# kg.add_ns("geo", "https://www.w3.org/2003/01/geo/wgs84_pos#", override=True, replace=True)

In [7]:
# kg.get_ns_dict()

In [8]:
# to_print = kg.rdf_graph().serialize(format = "ttl")[0:2500]
# print(to_print)

In [9]:
kg.save_rdf("output/kg_before_linking.ttl")

In [16]:
kg.save_rdf("output/kg_before_linking.nt", format="nt")

In [10]:
# # read again with rdflib and change namespace
# filename = "output/kg_before_linking.ttl"
# g = rdflib.Graph()
# g.parse(filename, format='ttl')

# to_print = g.serialize(format = "ttl")[0:2500]
# print(to_print)

In [11]:
# # get the current namespace manager
# ns_mgr = g.namespace_manager

# # register the new namespace prefix
# ns_mgr.bind("ns1", "schema", override=True)
# ns_mgr.bind("ns2", "dbp", override=True)
# ns_mgr.bind("ns3", "dbo", override=True)
# ns_mgr.bind("ns4", "ex", override=True)
# ns_mgr.bind("ns5", "geo", override=True)

In [12]:
# g.serialize(destination = "output/kg_before_linking_ns_correct.ttl", format = "ttl")

In [13]:
sparql = """

SELECT *
WHERE {
 ?p ?s ?o .
} LIMIT 10
    """

for row in kg._g.query(sparql):
    print(row)
    

(rdflib.term.URIRef('http://www.mine-db.org/COM00215.00#2017#F.coc#Clean%20coal'), rdflib.term.Literal('http://www.mine-db.org/COM00215.00'), rdflib.term.URIRef('https://dbpedia.org/property/producedBy'))
(rdflib.term.URIRef('http://www.mine-db.org/COM00069.00#2007#O.bulk#Me.Cu'), rdflib.term.Literal('http://www.mine-db.org/COM00069.00'), rdflib.term.URIRef('https://dbpedia.org/property/producedBy'))
(rdflib.term.URIRef('http://www.mine-db.org/COM01276.00#2016#Con.Zn#Me.Zn'), rdflib.term.Literal('70000.0', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#float')), rdflib.term.URIRef('https://dbpedia.org/property/quantity'))
(rdflib.term.URIRef('http://www.mine-db.org/COM00603.00#2001#O.bulk#Ore%20processed'), rdflib.term.Literal('ed:O.bulk'), rdflib.term.URIRef('https://schema.org/material'))
(rdflib.term.URIRef('http://example.com/COM01241.00'), rdflib.term.Literal('Gold'), rdflib.term.URIRef('https://dbpedia.org/property/products'))
(rdflib.term.URIRef('http://www.mine-d

In [14]:
# query some mine with specific label
sparql = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?s
WHERE {
 ?s rdfs:label "Ahafo"@en .
} LIMIT 10
    """

for row in kg._g.query(sparql):
    print(row)