# RDF graph processing against the integrated POIs

#### Auxiliary function to format SPARQL query results as a data frame:

In [None]:
import pandas as pds

def sparql_results_frame(qres):

    cols = qres.vars

    out = []
    for row in qres:
        item = []
        for c in cols:
            item.append(row[c])
        out.append(item)

    pds.set_option('display.max_colwidth', 0)
    
    return pds.DataFrame(out, columns=cols)

#### Create an **RDF graph** with the triples resulting from data integration:

In [None]:
from rdflib import Graph,URIRef

g = Graph()
g.parse('./output/integrated.nt', format="nt")

# Get graph size (in number of statements)
len(g)

#### Number of statements per predicate:

In [None]:
# SPARQL query is used to retrieve the results from the graph
qres = g.query(
    """SELECT ?p (COUNT(*) AS ?cnt) {
      ?s ?p ?o .
   } GROUP BY ?p ORDER BY DESC(?cnt)""")

# display unformatted query results
#for row in qres:
#    print("%s %s" % row)
    
# display formatted query results
sparql_results_frame(qres)

#### Identify POIs having _**name**_ similar to a user-specified one:

In [None]:
# SPARQL query is used to retrieve the results from the graph
qres = g.query(
    """PREFIX slipo: <http://slipo.eu/def#>
       PREFIX provo: <http://www.w3.org/ns/prov#>
    SELECT DISTINCT ?poiURI ?title
    WHERE { ?poiURI slipo:name ?n .
            ?n slipo:nameValue ?title .
            FILTER regex(?title, "^Achilleio", "i") 
        }
""")

# display query results
sparql_results_frame(qres)

#### **Fusion action** regarding a specific POI:

In [None]:
# SPARQL query is used to retrieve the results from the graph
qres = g.query(
    """PREFIX slipo: <http://slipo.eu/def#>
       PREFIX provo: <http://www.w3.org/ns/prov#>
    SELECT ?prov ?defaultAction ?conf
    WHERE { ?poiURI provo:wasDerivedFrom ?prov .
            ?poiURI slipo:name ?n .
            ?n slipo:nameValue ?title .
            ?poiURI slipo:address ?a .
            ?a slipo:street ?s .
            ?prov provo:default-fusion-action ?defaultAction .
            ?prov provo:fusion-confidence ?conf .
            FILTER regex(?title, "Achilleio", "i")
        }
""")

print("Query returned %d results." % len(qres)  )  

# display query results
sparql_results_frame(qres)

#### **Pair of original POIs** involved in this fusion:

In [None]:
# SPARQL query is used to retrieve the results from the graph
qres = g.query(
    """PREFIX slipo: <http://slipo.eu/def#>
       PREFIX provo: <http://www.w3.org/ns/prov#>
    SELECT ?leftURI ?rightURI ?conf
    WHERE { <http://www.provbook.org/d494ddbd-9a98-39b0-bec9-0477636c42f7> provo:left-uri ?leftURI .
            <http://www.provbook.org/d494ddbd-9a98-39b0-bec9-0477636c42f7> provo:right-uri ?rightURI .
            <http://www.provbook.org/d494ddbd-9a98-39b0-bec9-0477636c42f7> provo:fusion-confidence ?conf .
        }
""")
  
print("Query returned %d results." % len(qres))

# display pair of POI URIs along with the fusion confidence
sparql_results_frame(qres)

#### Values per attribute **before and after fusion** regarding this POI:

In [None]:
# SPARQL query is used to retrieve the results from the graph
qres = g.query(
    """PREFIX slipo: <http://slipo.eu/def#>
       PREFIX provo: <http://www.w3.org/ns/prov#>
    SELECT DISTINCT ?valLeft ?valRight ?valFused
    WHERE { ?poiURI provo:wasDerivedFrom <http://www.provbook.org/d494ddbd-9a98-39b0-bec9-0477636c42f7> .
            ?poiURI provo:appliedAction ?action .
            ?action provo:attribute ?attr .
            ?action provo:left-value ?valLeft .
            ?action provo:right-value ?valRight .
            ?action provo:fused-value ?valFused .
        }
""")
    
print("Query returned %d results." % len(qres)) 

# print query results
sparql_results_frame(qres)

# POI Analytics

#### Once integrated POI data has been saved locally, analysis can be perfomed using tools like **pandas** _DataFrames_, **geopandas** _GeoDataFrames_ or other libraries.

#### Unzip exported CSV file with the results of data integration:

In [None]:
import os
import zipfile

with zipfile.ZipFile('./output/corfu-integrated-pois.zip','r') as zip_ref:
    zip_ref.extractall("./output/")
    
os.rename('./output/points.csv', './output/corfu_pois.csv')

#### Load CSV data in a _DataFrame_:

In [None]:
import pandas as pd

pois = pd.read_csv('./output/corfu_pois.csv', delimiter='|', error_bad_lines=False)

# Geometries in the exported CSV file are listed in Extended Well-Known Text (EWKT)
# Since shapely does not support EWKT, update the geometry by removing the SRID value from EWKT
pois['the_geom'] = pois['the_geom'].apply(lambda x: x.split(';')[1])

pois.head()

#### Create a _GeoDataFrame_:

In [None]:
import geopandas
from shapely import wkt

pois['the_geom'] = pois['the_geom'].apply(wkt.loads)

gdf = geopandas.GeoDataFrame(pois, geometry='the_geom')

#### Display the location of the exported POIs on a **simplified plot** using _matplotlib_:

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

# Restrict focus to Germany:
ax = world[world.name == 'Greece'].plot(
    color='white', edgecolor='black')

# Plot the contents of the GeoDataFrame in blue dots:
gdf.plot(ax=ax, color='blue')

plt.show()