# Demo 4: Assigning Topics to Clusters

In [None]:
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')

import pandas as pd
import pyLDAvis

import loci as lc
from loci import io
from loci import clustering
from loci import topics
from loci import plots

## Create a POI GeoDataFrame from a remote CSV file produced by OSMWrangle

In [None]:
bound = lc.io.retrieve_osm_loc('Athens, Greece', buffer_dist=10000)
remote_file = 'http://download.slipo.eu/results/osm-to-csv/europe/europe_greece-pois.osm.csv.zip'
pois = lc.io.import_osmwrangle(remote_file, bound=bound, target_crs='EPSG:2100')
pois.head()

## Compute clusters

In [None]:
pois_in_clusters, eps_per_cluster = lc.clustering.compute_clusters(pois, alg='dbscan', min_pts=20, eps=100)
# pois_in_clusters, pois_noise, cluster_borders = lc.clustering.compute_clusters(pois, alg='hdbscan', min_pts=200)

In [None]:
cluster_borders = lc.clustering.cluster_shapes(pois_in_clusters, 2, eps_per_cluster)
plots.map_choropleth(cluster_borders, id_field='cluster_id', value_field='size')

## Extract topics from the clusters' keywords and assign topics to clusters

In [None]:
cluster_topics, topic_keywords, visualized_topics = lc.topics.topic_modeling(pois_in_clusters, num_of_topics=3, kwds_per_topic=10)

## Show the top keywords of each topic

In [None]:
topic_keywords.head()

## Show a visual depiction of the topics

In [None]:
pyLDAvis.enable_notebook()
visualized_topics

## Show the topics assigned to each cluster

In [None]:
cluster_topics.head()

## Merge cluster borders and topics

In [None]:
cluster_borders_topics = pd.merge(cluster_borders, cluster_topics, left_on='cluster_id', right_index=True, how='inner')
cluster_borders_topics.head()


## Color clusters based on dominant topic

In [None]:
lc.plots.map_clusters_with_topics(cluster_borders_topics, viz_type='dominant')

## Color clusters based on a specific topic

In [None]:
lc.plots.map_clusters_with_topics(cluster_borders_topics, viz_type='single', single_topic='Topic0')

## Color clusters with RGB color based on three topics

In [None]:
lc.plots.map_clusters_with_topics(cluster_borders_topics, viz_type='rgb', red='Topic0', green='Topic1', blue='Topic2')