In [1]:
from rdflib import Graph
from rdflib import Namespace
from rdflib.namespace import FOAF, NamespaceManager, DCTERMS
from rdflib import URIRef, Literal, BNode
import pandas as pd
import numpy as np


In [2]:
dataframe = pd.read_excel('data.xls', index_col=None, header=0)
massive = dataframe.to_numpy()

In [3]:
THIS = Namespace("http://irnok.net/ontology/geopollution/pollutiondb")
BASE = Namespace("http://irnok.net/ontology/geopollution/pollutiondb#")
GP = Namespace("http://irnok.net/ontology/geopollution#")
DBP = Namespace("http://dbpedia.org/page/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
WGS = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
XML = Namespace("http://www.w3.org/XML/1998/namespace")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")

In [4]:
graph = Graph()

In [5]:
#graph.bind('this',BASE)
graph.bind('gp',GP)
graph.bind('dbp',DBP)
graph.bind('owl',OWL)
graph.bind('rdf',RDF)
graph.bind('wgs',WGS)
graph.bind('xml',XML)
graph.bind('xsd',XSD)
graph.bind('rdfs',RDFS)

In [6]:
#Ontology annotation
BASE = URIRef("http://irnok.net/ontology/geopollution/pollutiondb")
graph.add((BASE, RDF.type, OWL.Ontology))
graph.add((BASE, RDFS.label, Literal('Database of Pollutions on Olkhon Island',lang='en')))
graph.add((BASE, RDFS.label, Literal('База данных загрязнений на острове Ольхон',lang='ru')))
graph.add((BASE , RDF.type, OWL.NamedIndividual))
graph.add((BASE , RDF.type, DBP.Database))

<Graph identifier=Nef187508916b42f3967601cd2f1b7b1b (<class 'rdflib.graph.Graph'>)>

In [7]:
#Annotation properties
graph.add((GP.amount, RDF.type, OWL.AnnotationProperty))
graph.add((GP.pollutedBy, RDF.type, OWL.AnnotationProperty))
graph.add((GP.unit, RDF.type, OWL.AnnotationProperty))
graph.add((THIS.contains, RDF.type, OWL.AnnotationProperty)) 
graph.add((WGS.lat, RDF.type, OWL.AnnotationProperty))
graph.add((WGS.long, RDF.type, OWL.AnnotationProperty))

<Graph identifier=Nef187508916b42f3967601cd2f1b7b1b (<class 'rdflib.graph.Graph'>)>

In [8]:
#Classes
graph.add((DBP.Sample, RDF.type, OWL.Class))
graph.add((DBP.Database, RDF.type, OWL.Class))

<Graph identifier=Nef187508916b42f3967601cd2f1b7b1b (<class 'rdflib.graph.Graph'>)>

In [9]:
#Individuals

    

for i in range(dataframe.shape[0]):
    currentSample = URIRef(dataframe['name'][i])
    graph.add((BASE + currentSample, RDF.type, OWL.NamedIndividual))
    graph.add((BASE + currentSample, RDF.type, DBP.Sample))
    graph.add((BASE + currentSample, WGS.lat, Literal(np.format_float_positional(float(dataframe['lat'][i])))))
    graph.add((BASE + currentSample, WGS.long, Literal(np.format_float_positional(float(dataframe['long'][i])))))
    for j in range(3,dataframe.shape[1]):
        currentSampleElement = dataframe.columns[j]
        currentSampleName = URIRef(currentSample+'-'+currentSampleElement+'-amount')
        graph.add((BASE + currentSample, THIS.contains, BASE + currentSampleName))

In [10]:
#Annotation
for i in range(dataframe.shape[0]):
    currentSampleName = dataframe['name'][i]
    for j in range(3,dataframe.shape[1]):
        currentSampleElement = dataframe.columns[j]
        currentSample = URIRef(currentSampleName+'-'+currentSampleElement+'-amount')
        graph.add((BASE + currentSample, GP.pollutedBy, DBP[currentSampleElement]))
        if (str(massive[i][j]).__contains__('<') is False):
            graph.add((BASE + currentSample, GP.amount, Literal(np.format_float_positional(float(massive[i][j])))))
        else:
            graph.add((BASE + currentSample, GP.amount, Literal(np.format_float_positional(float(massive[i][j].split('<')[-1])))))
        graph.add((BASE + currentSample, GP.unit, DBP.Percentage))

In [11]:
print(graph.serialize(format='ttl', base = BASE))

@base <http://irnok.net/ontology/geopollution/pollutiondb> .
@prefix dbp: <http://dbpedia.org/page/> .
@prefix gp: <http://irnok.net/ontology/geopollution#> .
@prefix ns1: <http://irnok.net/ontology/geopollution/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix wgs: <http://www.w3.org/2003/01/geo/wgs84_pos#> .

gp:amount a owl:AnnotationProperty .

gp:pollutedBy a owl:AnnotationProperty .

gp:unit a owl:AnnotationProperty .

<> a dbp:Database,
        owl:NamedIndividual,
        owl:Ontology ;
    rdfs:label "Database of Pollutions on Olkhon Island"@en,
        "База данных загрязнений на острове Ольхон"@ru .

<UGS-0211> a dbp:Sample,
        owl:NamedIndividual ;
    <contains> <UGS-0211-Aluminium_oxide-amount>,
        <UGS-0211-Arsenic-amount>,
        <UGS-0211-Barium-amount>,
        <UGS-0211-Calcium_oxide-amount>,
        <UGS-0211-Carbon-amount>,
        <UGS-0211-Cerium-amount>,
        <UGS-0211-Chromium-amoun

In [12]:
graph.serialize(format='ttl', destination='database-from-python.ttl', base = BASE)

<Graph identifier=Nef187508916b42f3967601cd2f1b7b1b (<class 'rdflib.graph.Graph'>)>