# Setup

### Full documentation about LOCI is available [here](https://slipo-eu.github.io/loci/).

In [None]:
import pandas as pd
import folium

import warnings
warnings.filterwarnings('ignore')

# LOCI dependencies:
import loci as lc
from loci import io
from loci import analytics
from loci import clustering
from loci import plots
from loci import topics
from loci import index

import cluster_diff
from cluster_diff import map_cluster_diff

# Avoid displaying full WKT in data frames
pd.set_option('display.max_colwidth', 100)

# POI Data Analytics using LOCI

#### Perform **spatial analytics** over the integrated POI data.

#### Create a _LOCI dataframe_ from the integrated POI dataset:

In [None]:
pois = lc.io.read_poi_csv(input_file='./output/corfu_pois.csv',
                       col_id='id',
                       col_name='name',
                       col_lon='lon',
                       col_lat='lat',
                       col_kwds='generic_category',
                       col_sep='|',
                       kwds_sep=',',
                       source_crs='EPSG:4326',
                       target_crs='EPSG:4326',
                       keep_other_cols=False)

# Utilize the category as keywords for spatial analytics:
pois.rename(columns={'generic_category': 'kwds'}, inplace=True)

# Turn all names and categories in uppercase characters to facilitate comparison:
pois['name'] = pois['name'].apply(lambda x: x.upper())
pois['kwds'] = pois['kwds'].apply(lambda x: [element.upper() for element in x])

pois.head(10)

#### Draw locations on **map**:

In [None]:
m = lc.plots.map_points(pois, show_bbox=True)
m

#### Create a **grid** of fixed-size cells and calculate **density**, i.e., the number of POIs inside each cell:

In [None]:
g, num_columns, num_rows = lc.index.grid(pois, cell_size_ratio=0.05)
print('Created ' + str(num_columns) + ' x ' + str(num_columns) + ' grid.')
g.head()

#### Visualize this grid on map:

In [None]:
warnings.filterwarnings('ignore')
lc.plots.map_choropleth(areas=g, id_field='cell_id', value_field='score')

# Keyword statistics with LOCI

#### Statistics on the _number of POIs per category_:

In [None]:
kf = lc.analytics.kwds_freq(pois)
kf

#### **Chart** showing number of POIs per category:

In [None]:
lc.plots.barchart(kf, plot_title='Top Keywords', x_axis_label='Keywords', y_axis_label='Frequency')

#### **Word cloud** of the various categories in the dataset:

In [None]:
lc.plots.plot_wordcloud(pois)

# Map-based analytics with LOCI

#### **Heatmap** of a particular category (_accommodation_) against the integrated POIs:

In [None]:
pois_filtered = lc.analytics.filter_by_kwd(pois, 'ACCOMMODATION')
lc.plots.heatmap(pois_filtered, radius=12)

#### Load **original** dataset for comparison with the integrated results

In [None]:
pois_original = lc.io.read_poi_csv(input_file='./datasets/osm20_pois_corfu.csv',
                       col_id='id',
                       col_name='name',
                       col_lon='lon',
                       col_lat='lat',
                       col_kwds='category',
                       col_sep='|',
                       kwds_sep=',',
                       source_crs='EPSG:4326',
                       target_crs='EPSG:4326',
                       keep_other_cols=False)

pois_original.rename(columns={'category': 'kwds'}, inplace=True)

lc.plots.plot_wordcloud(pois_original)

#### **Filter** POIs belonging to a specific category (e.g., _FOOD_):

In [None]:
# Filter on categories of the original data
pois_original = lc.analytics.filter_by_kwd(pois_original, kwd_filter='FOOD')

# Filter on categories of the final (integrated) data
pois_final = pois
pois_final = lc.analytics.filter_by_kwd(pois_final, kwd_filter='EAT/DRINK')

#### Create **Areas of Interest** (AOI) using _clustering_ with the DBSCAN algorithm:

In [None]:
# DBSCAN parameters
alg = 'dbscan'
min_pts = 10
eps = 0.01

#### Apply _clustering_ separately at each POI dataset:

In [None]:
aois_original, eps_original = lc.clustering.compute_clusters(pois_original, alg=alg, min_pts=min_pts, eps=eps, n_jobs=-1)
aois_final, eps_final = lc.clustering.compute_clusters(pois_final, alg=alg, min_pts=min_pts, eps=eps, n_jobs=-1)

# Conflate the resulting polygons
aois_original = lc.clustering.cluster_shapes(aois_original, 2, eps_original)
aois_final = lc.clustering.cluster_shapes(aois_final, 2, eps_final)

#### Display **AOIs for the original POIs**

In [None]:
plots.map_choropleth(aois_original, id_field='cluster_id', value_field='size')

#### Display **AOIs for the integrated POIs**

In [None]:
plots.map_choropleth(aois_final, id_field='cluster_id', value_field='size')

#### Compute **geometric difference** between AOIs:

In [None]:
gdf_diff = map_cluster_diff(aois_final, aois_original)

#### Plot the geometric differences on **map** with specific symbols and tooltips:

In [None]:
# Construct tooltip
fields = list(gdf_diff.columns.values)
fields.remove('geometry')
if 'style' in fields:
    fields.remove('style')
tooltip = folium.features.GeoJsonTooltip(fields=fields)

# Map styling
def style_function(row):
    return row['properties']['style']

m = lc.plots.map_geometries(gdf_diff, tiles='OpenStreetMap', width='100%', height='100%')

folium.GeoJson(
    gdf_diff,
    style_function=style_function,
    tooltip=tooltip
).add_to(m)
m

# Topic Modeling

#### Create a _LOCI dataframe_ from the **integrated** POI dataset:

In [None]:
pois = lc.io.read_poi_csv(input_file='./output/corfu_pois.csv',
                       col_id='name',
                       col_name='id',
                       col_lon='lon',
                       col_lat='lat',
                       col_kwds='category',
                       col_sep='|',
                       kwds_sep=',',
                       source_crs='EPSG:4326',
                       target_crs='EPSG:4326',
                       keep_other_cols=False)
pois['name'] = pois['name'].apply(lambda x: [element.upper() for element in x])
pois['category'] = pois['category'].apply(lambda x: [element.upper() for element in x])
pois.rename(columns={'category': 'kwds'}, inplace=True)

#### Apply _DBSCAN_ algorithm to find **spatial clusters** of POIs:

In [None]:
pois_in_clusters, eps_per_cluster = lc.clustering.compute_clusters(pois, alg='dbscan', min_pts=10, eps=0.005)
cluster_borders = lc.clustering.cluster_shapes(pois_in_clusters, 2, eps_per_cluster)

In [None]:
lc.plots.map_geometries(cluster_borders)

#### Model clusters as documents, extract **3 topics**, and assign topics to clusters:

In [None]:
cluster_topics, topic_keywords, visualized_topics = lc.topics.topic_modeling(pois_in_clusters, num_of_topics=3, kwds_per_topic=10)

In [None]:
topic_keywords.head()

#### Merge clusters with their topic assigments: 

In [None]:
cluster_borders_topics = pd.merge(cluster_borders, cluster_topics, left_on='cluster_id', right_index=True, how='inner')
cluster_borders_topics.head()

#### Plot each cluster with an **RGB color** signifying its adherence to each of the identified topics:

In [None]:
# Define map styling
def style_gen_mixed(row):
    r = round(row['properties']['Topic0'] * 255) 
    g = round(row['properties']['Topic1'] * 255) 
    b = round(row['properties']['Topic2'] * 255)
    color = '#{:02x}{:02x}{:02x}'.format(r, g, b)
    return {'fillColor': color, 'weight': 2, 'color': 'black', 'fillOpacity': 0.8}

# Construct tooltip
fields = list(cluster_borders_topics.columns.values)
fields.remove('geometry')
if 'style' in fields:
    fields.remove('style')
tooltip = folium.features.GeoJsonTooltip(fields=fields)

m = lc.plots.map_geometries(cluster_borders, tiles='OpenStreetMap', width='100%', height='100%')

folium.GeoJson(
    cluster_borders_topics,
    style_function=style_gen_mixed,
    tooltip=tooltip
).add_to(m)
m