In [97]:
# Imports
from rdflib import Graph
from rdflib import Namespace
from rdflib.namespace import FOAF, XSD, NamespaceManager, DCTERMS
from rdflib import URIRef, Literal, BNode
import pandas as pd
import numpy as np

In [98]:
# Loading start data
dataframe = pd.read_excel('data.xls', index_col=None, header=None)
massive = dataframe.to_numpy()
dictionary_data = pd.read_csv('dictionary.csv', index_col=None, header=None, sep=',')
excel = pd.read_excel('Данные.xls', index_col=None, header=None)

In [101]:
# Initialization dictionary
dictionary = {}
for i in range(dictionary_data.shape[0]):
    dictionary.update({dictionary_data[0][i]:dictionary_data[1][i]})

In [103]:
# Initialization namespaces
THIS = Namespace("http://irnok.net/ontology/geopollution/pollutiondb")
BASE = Namespace("http://irnok.net/ontology/geopollution/pollutiondb#")
GP = Namespace("http://irnok.net/ontology/geopollution#")
DBP = Namespace("http://dbpedia.org/page/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
WGS = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
XML = Namespace("http://www.w3.org/XML/1998/namespace")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")

In [104]:
# Initialization graph
graph = Graph()

In [105]:
# Binding graph's namespaces
graph.bind('gp',GP)
graph.bind('dbp',DBP)
graph.bind('owl',OWL)
graph.bind('rdf',RDF)
graph.bind('wgs',WGS)
graph.bind('xml',XML)
graph.bind('xsd',XSD)
graph.bind('rdfs',RDFS)

In [106]:
#Ontology annotation
BASE = URIRef("http://irnok.net/ontology/geopollution/pollutiondb")
graph.add((BASE, RDF.type, OWL.Ontology))
graph.add((BASE, RDFS.label, Literal('Database of Pollutions on Olkhon Island',lang='en')))
graph.add((BASE, RDFS.label, Literal('База данных загрязнений на острове Ольхон',lang='ru')))
graph.add((BASE , RDF.type, OWL.NamedIndividual))
graph.add((BASE , RDF.type, DBP.Database))

<Graph identifier=N88bd1f9b628442f583ae43f029439af7 (<class 'rdflib.graph.Graph'>)>

In [107]:
#Annotation properties
graph.add((GP.amount, RDF.type, OWL.AnnotationProperty))
graph.add((GP.pollutedBy, RDF.type, OWL.AnnotationProperty))
graph.add((GP.unit, RDF.type, OWL.AnnotationProperty))
graph.add((THIS.contains, RDF.type, OWL.AnnotationProperty)) 
graph.add((THIS.lessThan, RDF.type, DBP.Estimation)) 
graph.add((WGS.lat, RDF.type, OWL.AnnotationProperty))
graph.add((WGS.long, RDF.type, OWL.AnnotationProperty))

<Graph identifier=N88bd1f9b628442f583ae43f029439af7 (<class 'rdflib.graph.Graph'>)>

In [108]:
#Classes
graph.add((URIRef(DBP + 'Sample_(material)'), RDF.type, OWL.Class))
graph.add((DBP.Database, RDF.type, OWL.Class))

<Graph identifier=N88bd1f9b628442f583ae43f029439af7 (<class 'rdflib.graph.Graph'>)>

In [109]:
#Individuals

for i in range(2, excel.shape[0]):
    currentSample = URIRef(excel[0][i])
    graph.add((BASE + currentSample, RDF.type, OWL.NamedIndividual))
    graph.add((BASE + currentSample, RDF.type, (URIRef(DBP + 'Sample_(material)'))))
    graph.add((BASE + currentSample, WGS.lat, Literal(np.format_float_positional(float(excel[1][i])))))
    graph.add((BASE + currentSample, WGS.long, Literal(np.format_float_positional(float(excel[2][i])))))
    for j in range(3,dataframe.shape[1]):
        currentSampleElement = str(dictionary.get(excel[j][1]))
        currentSampleName = URIRef(currentSample+'-'+currentSampleElement+'-amount')
        graph.add((BASE + currentSample, THIS.contains, BASE + currentSampleName))

In [112]:
#Annotation
for i in range(2,excel.shape[0]):
    currentSampleName = excel[0][i]
    for j in range(3,dataframe.shape[1]):
        currentSampleElement = dictionary.get(excel[j][1])
        currentSample = URIRef(currentSampleName+'-'+currentSampleElement+'-amount')
        graph.add((BASE + currentSample, GP.pollutedBy, DBP[currentSampleElement]))
        if (str(excel[j][i]).__contains__('<') is False):
            graph.add((BASE + currentSample, GP.amount, Literal(np.format_float_positional(float(excel[j][i])), datatype=XSD.float)))
        else:
            graph.add((BASE + currentSample, GP.lessThan, Literal(np.format_float_positional(float(str(excel[j][i]).split('<')[-1])), datatype=XSD.float)))
        if (currentSample.__contains__('oxide')):
            graph.add((BASE + currentSample, GP.unit, DBP.Percentage))
        else:
            graph.add((BASE + currentSample, GP.unit, URIRef(DBP + 'Parts-per_notation')))         

In [113]:
print(graph.serialize(format='ttl', base = BASE))

@base <http://irnok.net/ontology/geopollution/pollutiondb> .
@prefix dbp: <http://dbpedia.org/page/> .
@prefix gp: <http://irnok.net/ontology/geopollution#> .
@prefix ns1: <http://irnok.net/ontology/geopollution/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix wgs: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

gp:amount a owl:AnnotationProperty .

gp:pollutedBy a owl:AnnotationProperty .

gp:unit a owl:AnnotationProperty .

<> a dbp:Database,
        owl:NamedIndividual,
        owl:Ontology ;
    rdfs:label "Database of Pollutions on Olkhon Island"@en,
        "База данных загрязнений на острове Ольхон"@ru .

<UGS-0211> a dbp:Sample_(material),
        owl:NamedIndividual ;
    <contains> <UGS-0211-https://dbpedia.org/page/Aluminium_oxide-amount>,
        <UGS-0211-https://dbpedia.org/page/Arsenic-amount>,
        <UGS-0211-https://dbpedia.org/page/Barium-amount>,
    

In [114]:
graph.serialize(format='ttl', destination='database-from-python.ttl', base = BASE)

<Graph identifier=N88bd1f9b628442f583ae43f029439af7 (<class 'rdflib.graph.Graph'>)>