# Demo 5: Frequent Location Set Mining

In [None]:
import sys
sys.path.append('..')

import loci as lc
from loci import io
from loci import clustering
from loci import analytics
from loci import plots

## Create a GeoDataFrame from a CSV file containing geolocated posts by users

In [None]:
pois = io.read_poi_csv(input_file='../datasets/flickr-berlin.csv', col_name='user_id', source_crs='EPSG:4326', target_crs='EPSG:3068')
pois.head()

## Cluster posts together to identify main locations

In [None]:
pois_in_clusters, pois_noise, cluster_borders = lc.clustering.compute_clusters(pois, alg='hdbscan', min_pts=500)

In [None]:
pois_in_clusters.head()

## Show the clusters

In [None]:
plots.map_choropleth(cluster_borders, id_field='cluster_id', value_field='size')

In [None]:
freq_loc = lc.analytics.freq_locationsets(location_visits=pois_in_clusters,
                               locations=cluster_borders,
                               location_id_col='cluster_id',
                               locationset_id_col='user_id',
                               min_sup=0.015, min_length=3)

In [None]:
print('Frequent location sets found: ' + str(len(freq_loc.index)))

## Sort results by support

In [None]:
freq_loc.sort_values(by='support', ascending=False).head()

## Sort results by length

In [None]:
freq_loc.sort_values(by='length', ascending=False).head()

## Show selected result on map

In [None]:
result_id = 390
lc.plots.map_geometry(freq_loc.to_crs(crs={'init': 'EPSG:4326'}).loc[result_id].geometry)