# Setup

To run the example, a file named `secret.py` must be created in the notebooks
folder with the following content:

```python
# Configuration settings

# SLIPO workbench installation
BASE_URL = 'https://app.dev.slipo.eu'

# SLIPO API key
API_KEY = ''
```

The `API_KEY` value must be set to a valid SLIPO Application Key. The file must be imported before creating a new context:

```python
from secret import BASE_URL, API_KEY
```

In [None]:
# Create new context
from slipoframes.context import SlipoContext

from secret import BASE_URL, API_KEY

ctx = SlipoContext(
    base_url = BASE_URL,
    requires_ssl = False,
    api_key = API_KEY
)

# Transform operation

Next we are going to:

* Upload the files `DKV_Fuel_Berlin.csv` and `OSM_Fuel_Berlin.csv` from the local folder `datasets` to the remote folder `notebooks/datasets`. The remote folder will be created automatically if not already exists. The option `overwrite` is also set to `True` to overwrite any existing files.
* Upload the contents of folder `config` from the local file system to the remote folder `notebooks/config`.
* Execute two transform operations to convert the `CSV` data to `N-Triples`.
* Check the status of each operation

In [None]:
# Upload file DKV_Berlin.csv
ctx.file_upload('./datasets/DKV_Fuel_Berlin.csv', 'notebooks/datasets/DKV_Fuel_Berlin.csv', overwrite=True)

In [None]:
# Upload file DKV_Berlin.csv
ctx.file_upload('./datasets/OSM_Fuel_Berlin.csv', 'notebooks/datasets/OSM_Fuel_Berlin.csv', overwrite=True)

In [None]:
# Upload all files in the config folder
ctx.file_upload('./config', 'notebooks/config', overwrite=True)

In [None]:
# Browse remote user file system
df_files = ctx.file_browse(sort_col='size', format_size=True, sort_asc=False)

df_files[df_files['path'].str.startswith("notebooks")]

In [None]:
# Convert DKV_Fuel_Berlin.csv file to N-Triples
transform1 = ctx.transform_csv(
    'notebooks/datasets/DKV_Fuel_Berlin.csv',
    attrCategory='all_tags',
    attrKey='ID',
    attrName='name',
    attrX='lon',
    attrY='lat',
    delimiter=';',
    featureSource='DKV',
    quote='',
    mappingSpec='notebooks/config/DKV_Fuel_Berlin_slipo_mappings.yml',
    classificationSpec='notebooks/config/DKV_POI_sample_classification.csv'
)

In [None]:
# Convert OSM_Fuel_Berlin.csv file to N-Triples
transform2 = ctx.transform_csv(
    'notebooks/datasets/OSM_Fuel_Berlin.csv',
    attrCategory='type',
    attrGeometry='wkt',
    attrKey='osm_id',
    attrName='name',
    attrX='lon',
    attrY='lat',
    delimiter='|',
    featureSource='OpenStreetMap',
    profile='OSM_Europe',
    quote='',
    mappingSpec='notebooks/config/OSM_Fuel_Berlin_slipo_mappings.yml',
    classificationSpec='notebooks/config/OSM_POI_sample_classification.csv'
)

In [None]:
# Check process status for transform operations
transform1 = ctx.process_status(transform1)
transform2 = ctx.process_status(transform2)

# Interlink operation

Execute an interlink operation on the RDF datasets generated by the previous two transformation operations

In [None]:
interlink1 = ctx.interlink(
    'SLIPO_equiMatchByNameAndDistance',
    left=transform1.output(),
    right=transform2.output()
)

In [None]:
# Check process status for interlink operation
interlink1 = ctx.process_status(interlink1)

# Fuse Operation

Fuse the two RDF datasets generated by operations `transform1` and `transform2` using the links from operation `interlink`

In [None]:
fuse1 = ctx.fuse(
    'SLIPO_default_abMode',
    left=transform1.output(),
    right=transform2.output(),
    links=interlink1.output()
)

In [None]:
# Check process status for fuse operation
fuse1 = ctx.process_status(fuse1)

# Enrich Operation

Enrich the fused RDF dataset from operation `fuse1`

In [None]:
enrich1 = ctx.enrich(
    'SLIPO_TomTom_Bucharest',
    source=fuse1.output()
)

In [None]:
# Check process status for enrich operation
enrich1 = ctx.process_status(enrich1)

# Export Operation

Export the enriched RDF dataset to a CSV file

In [None]:
export1 = ctx.export_csv(
    'SLIPO_default',
    enrich1.output(),
    delimiter='|',
    quote='"'
)

In [None]:
# Check process status for export operation
export1 = ctx.process_status(export1)

In [None]:
# Copy output file to local file system
ctx.process_file_download(export1.output(), target='./output/exported-data.zip', overwrite=True)

# Execute an existing workflow

Run a prespecified data integration workflow that involves all stages (transformation, interlinking, fusion, enrichment, export).

Identify the workflow, its various versions and their executions:

In [None]:
processes = ctx.process_query(
    'Integrate OSM & DKV data in Berlin (updated)',
    0,
    10
)

processes[['Id','Name','Executed On','Version']]

Execute a new version of this workflow:

In [None]:
ctx.process_start(352, 8)


Inspect the status of this workflow execution:

In [None]:
workflow1 = ctx.process_status(352, 8)

Render this workflow as a graph with all its components:

In [None]:
ctx.process_render(workflow1)

# POI Data Analytics

Once integrated POI data has been saved locally, analysis can be perfomed using
tools like pandas DataFrames, geopanadas GeoDataFrames or other libraries.

In [None]:
# Unzip exported CSV file with the results of data integration
import os
import zipfile

with zipfile.ZipFile('./output/exported-data.zip','r') as zip_ref:
    zip_ref.extractall("./output/")
    
os.rename('./output/points.csv', './output/Fuel_Berlin.csv')

In [None]:
# Load CSV data in a DataFrame
import pandas as pd

pois = pd.read_csv('./output/Fuel_Berlin.csv', delimiter='|', error_bad_lines=False)

# Geometries in the exported CSV file are listed in Extended Well-Known Text (EWKT)
# Since shapely does not support EWKT, update the geometry by removing the SRID value from EWKT
pois['the_geom'] = pois['the_geom'].apply(lambda x: x.split(';')[1])

pois.head()

In [None]:
# Create a GeoDataFrame
import geopandas
from shapely import wkt

pois['the_geom'] = pois['the_geom'].apply(wkt.loads)

gdf = geopandas.GeoDataFrame(pois, geometry='the_geom')

In [None]:
# Display the location of the exported POIs on a simple plot using matplotlib
%matplotlib inline

import matplotlib.pyplot as plt

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

# Restrict focus to Germany:
ax = world[world.name == 'Germany'].plot(
    color='white', edgecolor='black')

# Plot the contents of the GeoDataFrame in blue dots:
gdf.plot(ax=ax, color='blue')

plt.show()

# POI Data Analytics using LOCI

Perform spatial analytics over the integrated POI data.

In [None]:
# LOCI dependencies:
import loci as lc
from loci import io
from loci import analytics
from loci import plots

In [None]:
# Create a dataframe from the integrated POI dataset:
pois = lc.io.read_poi_csv(input_file='./output/Fuel_Berlin.csv',
                       col_id='id',
                       col_name='uri',
                       col_lon='lon',
                       col_lat='lat',
                       col_kwds='name',
                       col_sep='|',
                       kwds_sep=',',
                       source_crs='EPSG:4326',
                       target_crs='EPSG:4326',
                       keep_other_cols=False)

# Turn all names in uppercase characters to facilitate comparison:
pois['name'] = pois['name'].apply(lambda x: [element.upper() for element in x])

pois.head(10)

Utilize the name of the various brands as keywords for spatial analytics:

In [None]:
pois.rename(columns={'name': 'kwds'}, inplace=True)
pois.rename(columns={'uri': 'name'}, inplace=True)

Draw locations on map:

In [None]:
m = lc.plots.map_points(pois, show_bbox=True)
m

Statistics on the number of fuel stations per brand name

In [None]:
kf = lc.analytics.kwds_freq(pois)
kf

Chart showing fuel stations per brand name:

In [None]:
lc.plots.barchart(kf, plot_title='Top Keywords', x_axis_label='Keywords', y_axis_label='Frequency')

Word cloud of the various brands in the dataset:

In [None]:
lc.plots.plot_wordcloud(pois)

Heatmap of the fuel stations belonging to a particular brand:

In [None]:
pois_filtered = lc.analytics.filter_by_kwd(pois, 'TOTAL')
lc.plots.heatmap(pois_filtered, radius=12)