In [1]:
import csv, re, requests
from rdflib import Dataset, URIRef, Literal, Namespace, RDF, RDFS, OWL, XSD
from iribaker import to_iri
from SPARQLWrapper import SPARQLWrapper, JSON

In [2]:
# from geomet import wkt
# pip install git+git://github.com/geomet/geomet.git

In [2]:
TUTORIAL_REPOSITORY = "http://stardog.krw.d2s.labs.vu.nl/group12"

def upload_to_stardog(data):
    transaction_begin_url = TUTORIAL_REPOSITORY + "/transaction/begin"
    
    # Start the transaction, and get a transaction_id
    response = requests.post(transaction_begin_url, headers={'Accept': 'text/plain'})
    transaction_id = response.content
    
    # POST the data to the transaction
    post_url = TUTORIAL_REPOSITORY + "/" + transaction_id + "/add"
    response = requests.post(post_url, data=data, headers={'Accept': 'text/plain', 'Content-type': 'application/trig'})
    
    # Close the transaction
    transaction_close_url = TUTORIAL_REPOSITORY + "/transaction/commit/" + transaction_id
    response = requests.post(transaction_close_url)

    return str(response.status_code)

In [3]:
VALID_CHARS = list("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-")

In [4]:
# A namespace for our resources
data = 'http://data.krw.d2s.labs.vu.nl/group12/resource/'
DATA = Namespace(data)
# A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.)
vocab = 'http://data.krw.d2s.labs.vu.nl/group12/vocab/'
VOCAB = Namespace('http://data.krw.d2s.labs.vu.nl/group12/vocab/')

# The URI for our graph
graph_uri = URIRef('http://data.krw.d2s.labs.vu.nl/group12/resource/trafficlightgraph')

# We initialize a dataset, and bind our namespaces
dataset = Dataset()
dataset.bind('g12data',DATA)
dataset.bind('g12vocab',VOCAB)

# We then get a new graph object with our URI from the dataset.
graph = dataset.graph(graph_uri)

# Create namespaces for our Geo-data
GSP = Namespace('http://www.opengis.net/ont/geosparql#')
GSF = Namespace('http://www.opengis.net/ont/sf#')

In [5]:
myFile = open('../Milestone 1/data/VERKEERSLICHTEN.csv', 'r')
verkeerslichten = csv.reader(myFile, delimiter=',', quotechar='"')

headers = verkeerslichten.next()
headers[0] = headers[0].split('\xef\xbb\xbf')[1] if headers[0].startswith('\xef') else headers[0]

idIDX = headers.index('OBJECTNUMMER')
kpnIDX = headers.index('Kruispuntnummer')
krpIDX = headers.index('Kruispunt')
gerIDX = headers.index('Geregeld_sinds_schatting')
verIDX = headers.index('Vervallen_sinds_schatting')
regIDX = headers.index('Regeling')
srtIDX = headers.index('Soort')
kopIDX = headers.index('Koppeling')
staIDX = headers.index('Status')
webIDX = headers.index('Weblink')
cooIDX = headers.index('COORDS')
latIDX = headers.index('CENTERLAT2')
lngIDX = headers.index('CENTERLNG2')

In [6]:
# Load the externally defined schema into the default graph (context) of the dataset
dataset.default_context.parse('vocab.ttl', format='turtle')

# create pattern for splitting streets in Kruispunt column
splitpattern = re.compile(r' / *| - ')

# http://dbpedia.org/page/Amsterdam
for row in verkeerslichten:
    # first create the URI's
    verkeerslicht      = URIRef(to_iri(data + 'TrafficLight/' + row[idIDX]))
    verkeerslichtNaam  = Literal('Trafficlight' + row[idIDX], datatype=XSD['string'])
    kruispunt          = URIRef(to_iri(data + 'TrafficlightJunction/' + row[idIDX]))
    kruispuntNaam      = Literal('TrafficlightJunction' + row[idIDX], datatype=XSD['string'])
    kruispuntNummer    = Literal(row[kpnIDX], datatype=XSD['int'])
    if row[gerIDX].strip() != '0' and row[gerIDX].strip() != ' ':
        geregeldSinds  = Literal(row[gerIDX], datatype=XSD['gYear'])
    else:
        geregeldSinds = ''
    if row[verIDX].strip() != '0' and row[verIDX].strip() != '':
        vervallenSinds = Literal(row[verIDX], datatype=XSD['gYear'])
    else:
        vervallenSinds = ''
    status             = Literal(row[staIDX], lang='nl')
    straatURI = []
    straatNaam = []
    for el in re.split(splitpattern, row[krpIDX]):
        el = el.strip()
        temp = ''.join([c for c in el.replace(' ', '_') if c in VALID_CHARS])
        straatURI.append(URIRef(to_iri(data + temp)))
        straatNaam.append(Literal(el, lang='nl'))
    regeling           = Literal(row[regIDX], lang='nl')
    soort              = Literal(row[srtIDX], lang='nl')
    koppeling          = Literal(row[kopIDX], lang='nl')
    weblink            = URIRef(to_iri(row[webIDX])) if row[webIDX].strip() != '' else ''
    # original data uses Dutch decimal mark (comma), switch to dot
#     try:
#         newLat = float(row[latIDX].replace(',', '.'))
#     except ValueError:
#         newLat = 0.0
#     lat = Literal(newLat, datatype=XSD['float'])
#     try:
#         newLng = float(row[lngIDX].replace(',', '.'))
#     except ValueError:
#         newLng = 0.0
#     lng = Literal(newLng, datatype=XSD['float'])
    coords             = Literal(row[cooIDX], datatype=GSP.wktLiteral)
    myTLPoint          = URIRef(to_iri(data + 'TrafficLight/' + row[idIDX] + '/Point'))
    myTLJPoint         = URIRef(to_iri(data + 'TrafficLightJunction/' + row[idIDX] + '/Point'))
    
    # second create the actual triples
    graph.add((verkeerslicht, RDF.type, VOCAB['Trafficlight']))
    print verkeerslicht
    print RDF.type
    print VOCAB['Trafficlight']
    graph.add((verkeerslicht, RDFS.label, verkeerslichtNaam))
    graph.add((verkeerslicht, GSP.hasGeometry, myTLPoint))
    graph.add((verkeerslicht, VOCAB['trafficlightJunction'], kruispunt))
    if geregeldSinds != '':
        graph.add((verkeerslicht, VOCAB['geregeldSinds'], geregeldSinds))
    if vervallenSinds != '':
        graph.add((verkeerslicht, VOCAB['vervallenSinds'], vervallenSinds))
    graph.add((verkeerslicht, VOCAB['status'], status))
    graph.add((verkeerslicht, VOCAB['regeling'], regeling))
    graph.add((verkeerslicht, VOCAB['soort'], soort))
    graph.add((verkeerslicht, VOCAB['koppeling'], koppeling))
    if weblink != '':
        graph.add((verkeerslicht, VOCAB['weblink'], weblink))
#     graph.add((verkeerslicht, VOCAB['lat'], lat))
#     graph.add((verkeerslicht, VOCAB['lng'], lng))
    graph.add((kruispunt, RDF.type, VOCAB['TrafficlightJunction']))
    graph.add((kruispunt, RDFS.label, kruispuntNaam))
    graph.add((kruispunt, VOCAB['trafficlight'], verkeerslicht))
    graph.add((kruispunt, VOCAB['kruispuntnummer'], kruispuntNummer))
#     graph.add((kruispunt, VOCAB['lat'], lat))
#     graph.add((kruispunt, VOCAB['lng'], lng))
    for uri, naam in zip(straatURI, straatNaam):
        graph.add((uri, RDF.type, VOCAB['Street']))
        graph.add((uri, RDFS.label, naam))
        graph.add((kruispunt, VOCAB['street'], uri))
    graph.add((kruispunt, GSP.hasGeometry, myTLJPoint))
    graph.add((myTLPoint, RDF.type, GSF.Point))
    graph.add((myTLPoint, GSP.asWKT, coords))
    graph.add((myTLJPoint, RDF.type, GSF.Point))
    graph.add((myTLJPoint, GSP.asWKT, coords))

# close the csv file
myFile.close()

# print 'response.status_code = ' + upload_to_stardog(dataset.serialize(format='trig'))

http://data.krw.d2s.labs.vu.nl/group12/resource/TrafficLight/1
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://data.krw.d2s.labs.vu.nl/group12/vocab/Trafficlight
http://data.krw.d2s.labs.vu.nl/group12/resource/TrafficLight/2
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://data.krw.d2s.labs.vu.nl/group12/vocab/Trafficlight
http://data.krw.d2s.labs.vu.nl/group12/resource/TrafficLight/3
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://data.krw.d2s.labs.vu.nl/group12/vocab/Trafficlight
http://data.krw.d2s.labs.vu.nl/group12/resource/TrafficLight/4
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://data.krw.d2s.labs.vu.nl/group12/vocab/Trafficlight
http://data.krw.d2s.labs.vu.nl/group12/resource/TrafficLight/5
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://data.krw.d2s.labs.vu.nl/group12/vocab/Trafficlight
http://data.krw.d2s.labs.vu.nl/group12/resource/TrafficLight/6
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://data.krw.d2s.labs.vu.nl/group12/vocab

In [12]:
#print dataset.serialize(format='trig')

In [13]:
# with open('trafficlights-rdf.trig','w') as f:
#     dataset.serialize(f, format='trig')

In [10]:
query = """
PREFIX gr12vocab: <http://data.krw.d2s.labs.vu.nl/group12/vocab/> 

SELECT * WHERE {
  ?x a gr12vocab:Verkeerslicht .
}LIMIT 10"""

endpoint = TUTORIAL_REPOSITORY + '/query'

sparql = SPARQLWrapper(endpoint)

sparql.setQuery(query)

sparql.setReturnFormat(JSON)
sparql.addParameter('Accept','application/sparql-results+json')

True

In [11]:
#sparql.addParameter('reasoning','false')
#response = sparql.query().convert()
#print response

In [12]:
#sparql.addParameter('reasoning','true')
#response = sparql.query().convert()
#print response