## Populate RDF database subclasses: Amenity and Shop

This notebook reports the uploading of the dataset, filtering Amenity and Shop subclasses and create an RDF dataset from them accordingly to an ontology.

In [2]:
# Libraries
import os
from pyrosm import OSM
from pathlib import Path
from rdflib import Graph, Literal, RDF, URIRef, Namespace, FOAF
from rdflib.namespace import XSD


In [3]:
# Parameters and saving folder
path = str(Path(os.path.abspath(os.getcwd())).absolute())
dataset = path + '\\data\\padova-mestre-veneto.osm.pbf'
savepath = path + '\\graph\\'

In [4]:
# Loading dataset
osm = OSM(dataset)

## Amenities

In [15]:
# Filter amenities
amenities = osm.get_pois(custom_filter={"amenity": True})
amenities = amenities[amenities['name'].notnull()]
amenities.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 4871 entries, 0 to 16789
Data columns (total 46 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   version           4871 non-null   int8    
 1   timestamp         4871 non-null   int64   
 2   lat               3693 non-null   float32 
 3   lon               3693 non-null   float32 
 4   id                4871 non-null   int64   
 5   tags              3312 non-null   object  
 6   changeset         3738 non-null   float64 
 7   addr:city         1186 non-null   object  
 8   addr:country      163 non-null    object  
 9   addr:housenumber  1986 non-null   object  
 10  addr:housename    17 non-null     object  
 11  addr:postcode     1411 non-null   object  
 12  addr:place        422 non-null    object  
 13  addr:street       1743 non-null   object  
 14  email             273 non-null    object  
 15  name              4871 non-null   object  
 16  opening_hours  

  gdf = get_poi_data(


In [16]:
amenities.head(50)

Unnamed: 0,version,timestamp,lat,lon,id,tags,changeset,addr:city,addr:country,addr:housenumber,...,social_facility,source,start_date,wikipedia,geometry,osm_type,building:levels,fast_food,hospital,landuse
0,0,0,45.332729,12.316942,27179878,"{""ferry"":""yes"",""public_transport"":""station""}",0.0,,,,...,,,,,POINT (12.31694 45.33273),node,,,,
1,0,0,45.42614,12.378482,27241169,"{""cargo"":""passengers;vehicle"",""ferry"":""yes"",""p...",0.0,,,,...,,,,,POINT (12.37848 45.42614),node,,,,
2,0,0,45.433369,12.344954,27465310,"{""ferry"":""yes"",""public_transport"":""station"",""w...",0.0,,,,...,,,,,POINT (12.34495 45.43337),node,,,,
3,0,0,45.452625,12.354618,27486304,"{""ferry"":""yes"",""name:en"":""Murano Faro"",""public...",0.0,,,,...,,,,,POINT (12.35462 45.45263),node,,,,
4,0,0,45.408813,11.903057,215541278,"{""brand"":""Q8"",""brand:wikidata"":""Q1634762"",""fue...",0.0,,,,...,,MISE - Ministero Sviluppo Economico,,,POINT (11.90306 45.40881),node,,,,
5,0,0,45.410294,11.907196,215541291,"{""brand"":""Esso Express"",""brand:wikidata"":""Q235...",0.0,,,,...,,,,,POINT (11.90720 45.41029),node,,,,
6,0,0,45.397152,11.899335,215543602,"{""brand"":""Api-Ip"",""brand:wikidata"":""Q3995933"",...",0.0,,,,...,,MISE - Ministero Sviluppo Economico,,,POINT (11.89933 45.39715),node,,,,
8,0,0,45.396347,11.888076,215823249,"{""brand"":""Api-Ip"",""fuel:diesel"":""yes"",""fuel:oc...",0.0,,,,...,,MISE - Ministero Sviluppo Economico,,,POINT (11.88808 45.39635),node,,,,
9,0,0,45.386353,11.897065,215827913,"{""automated"":""yes"",""brand"":""OIL!"",""brand:wikid...",0.0,,,,...,,MISE - Ministero Sviluppo Economico,,,POINT (11.89707 45.38635),node,,,,
10,0,0,45.389965,11.87102,215930623,"{""brand"":""Esso"",""fuel:GTL_diesel"":""yes"",""fuel:...",0.0,,,,...,,MISE - Ministero Sviluppo Economico,,,POINT (11.87102 45.38997),node,,,,


In [6]:
# Construct ontology namespaces not known by RDFlib
OSMO = Namespace("http://www.dei.unipd.it/database2/customOSMOntology#")

In [7]:
# Create the graph
g = Graph()

# Bind the namespaces to a prefix
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("osmo", OSMO)


In [8]:
%%time
#measure execution time

for index, row in amenities.iterrows():
    # Create the node to add to the Graph
    # the node has 'amenity' + id as URI
    Amenity = URIRef(OSMO["amenity"+str(row['id'])])
    g.add((Amenity, RDF.type, OSMO.Node))
    g.add((Amenity, OSMO['hasId'], Literal(row['id'], datatype=XSD.integer)))
    g.add((Amenity, OSMO['hasLatitude'], Literal(row['lat'], datatype=XSD.decimal)))
    g.add((Amenity, OSMO['hasLongitude'], Literal(row['lon'], datatype=XSD.decimal)))
    g.add((Amenity, OSMO['amenityType'], Literal(row['amenity'], datatype=XSD.string)))
    g.add((Amenity, OSMO['name'], Literal(row['name'], datatype=XSD.string)))
    addr = '{}, {}, {}'.format(row['addr:street'], row['addr:housenumber'], row['addr:city'])
    g.add((Amenity, OSMO['address'], Literal(addr, datatype=XSD.string)))
    g.add((Amenity, OSMO['website'], Literal(row['website'], datatype=XSD.string)))

CPU times: total: 3.8 s
Wall time: 3.78 s


In [9]:
%%time
# print all the data in the Turtle format
print("--- saving serialization ---")
with open(savepath + 'amenity.ttl', 'wb') as file:
    file.write(g.serialize(format='turtle', encoding='utf-8'))

--- saving serialization ---
CPU times: total: 3.78 s
Wall time: 3.78 s


## Shops

In [17]:
# Filter shops
shops = osm.get_pois(custom_filter={"shop": True})
shops = shops[shops['name'].notnull()]
shops.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 2289 entries, 0 to 3006
Data columns (total 39 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   version           2289 non-null   int8    
 1   timestamp         2289 non-null   int64   
 2   lat               2101 non-null   float32 
 3   lon               2101 non-null   float32 
 4   id                2289 non-null   int64   
 5   tags              1158 non-null   object  
 6   changeset         2106 non-null   float64 
 7   addr:city         909 non-null    object  
 8   addr:country      109 non-null    object  
 9   addr:housenumber  1350 non-null   object  
 10  addr:housename    4 non-null      object  
 11  addr:postcode     919 non-null    object  
 12  addr:place        209 non-null    object  
 13  addr:street       1226 non-null   object  
 14  email             231 non-null    object  
 15  name              2289 non-null   object  
 16  opening_hours   

  gdf = get_poi_data(


In [18]:
shops.head(50)

Unnamed: 0,version,timestamp,lat,lon,id,tags,changeset,addr:city,addr:country,addr:housenumber,...,pastry,religion,second_hand,shop,tobacco,geometry,osm_type,agrarian,kiosk,wholesale
0,0,0,45.455078,11.856764,227024220,"{""amenity"":""fuel"",""brand"":""Agip Eni"",""fuel:die...",0.0,,,,...,,,,kiosk,,POINT (11.85676 45.45508),node,,,
1,0,0,45.410629,11.857927,256072065,"{""wheelchair"":""yes""}",0.0,,,31,...,,,,supermarket,,POINT (11.85793 45.41063),node,,,
3,0,0,45.389431,11.965428,290040241,,0.0,,,,...,,,,supermarket,,POINT (11.96543 45.38943),node,,,
4,0,0,45.435944,11.87046,296955042,,0.0,,,,...,,,,convenience,,POINT (11.87046 45.43594),node,,,
5,0,0,45.448483,11.871986,296955316,"{""brand"":""Eurospar"",""brand:wikidata"":""Q12309283""}",0.0,,,,...,,,,supermarket,,POINT (11.87199 45.44848),node,,,
6,0,0,45.440342,11.886786,296963264,,0.0,,,,...,,,,supermarket,,POINT (11.88679 45.44034),node,,,
7,0,0,45.412014,11.908236,334511575,"{""brand"":""Conad"",""brand:wikidata"":""Q639075"",""b...",0.0,Padova,,61,...,,,,supermarket,,POINT (11.90824 45.41201),node,,,
8,0,0,45.41161,11.910622,334512025,"{""branch"":""Italia SRL"",""brand"":""Decathlon"",""br...",0.0,,,,...,,,,sports,,POINT (11.91062 45.41161),node,,,
9,0,0,45.403027,11.912572,334512158,,0.0,Padova,,94/A,...,,,,supermarket,,POINT (11.91257 45.40303),node,,,
12,0,0,45.417645,11.879622,442918389,"{""addr:unit"":""2"",""brand"":""Spar"",""brand:wikidat...",0.0,,,14,...,,,,supermarket,,POINT (11.87962 45.41764),node,,,


In [19]:
# Create the graph
g = Graph()

# Bind the namespaces to a prefix
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("osmo", OSMO)

In [None]:
%%time
#measure execution time

for index, row in shops.iterrows():
    # Create the node to add to the Graph
    # the node has 'shop' + id as URI
    Shop = URIRef(OSMO["shop"+str(row['id'])])
    g.add((Amenity, RDF.type, OSMO.Node))
    g.add((Amenity, OSMO['hasId'], Literal(row['id'], datatype=XSD.integer)))
    g.add((Amenity, OSMO['hasLatitude'], Literal(row['lat'], datatype=XSD.decimal)))
    g.add((Amenity, OSMO['hasLongitude'], Literal(row['lon'], datatype=XSD.decimal)))
    g.add((Amenity, OSMO['shopType'], Literal(row['shop'], datatype=XSD.string)))
    g.add((Amenity, OSMO['name'], Literal(row['name'], datatype=XSD.string)))
    addr = '{}, {}, {}'.format(row['addr:street'], row['addr:housenumber'], row['addr:city'])
    g.add((Amenity, OSMO['address'], Literal(addr, datatype=XSD.string)))
    g.add((Amenity, OSMO['website'], Literal(row['website'], datatype=XSD.string)))

In [20]:
%%time
# print all the data in the Turtle format
print("--- saving serialization ---")
with open(savepath + 'shop.ttl', 'wb') as file:
    file.write(g.serialize(format='turtle', encoding='utf-8'))

--- saving serialization ---
CPU times: total: 0 ns
Wall time: 999 µs
