# Demo 5: Frequent Location Set Mining

In [15]:
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')

%load_ext autoreload
%autoreload 2

import loci as lc
from loci import io
from loci import clustering
from loci import analytics
from loci import plots

## Create a GeoDataFrame from a CSV file containing geolocated posts by users

In [2]:
pois = io.read_poi_csv(input_file='../datasets/flickr-berlin.csv', col_name='user_id', source_crs='EPSG:4326', target_crs='EPSG:3068')
pois.head()

Loaded 316281 POIs.


Unnamed: 0,id,user_id,geometry
0,0,74888386@N00,POINT (22948.096 22427.465)
1,1,31217431@N02,POINT (20143.164 19693.263)
2,2,22530195@N05,POINT (3574.476 9719.844)
3,3,11946969@N00,POINT (24287.461 21084.614)
4,4,67499195@N00,POINT (24351.985 19422.905)


## Cluster posts together to identify main locations

In [3]:
pois_in_clusters, eps_per_cluster = lc.clustering.compute_clusters(pois, alg='hdbscan', min_pts=200)

Done in 23.553s.
Number of clusters: 290
Number of clustered POIs: 168511
Number of outlier POIs: 147770


In [6]:
cluster_borders = lc.clustering.cluster_shapes(pois_in_clusters, 1, eps_per_cluster)
plots.map_choropleth(cluster_borders, id_field='cluster_id', value_field='size')

Done in 1.232s.


## Find frequent location sets

In [7]:
freq_loc = lc.analytics.freq_locationsets(location_visits=pois_in_clusters,
                               locations=cluster_borders,
                               location_id_col='cluster_id',
                               locationset_id_col='user_id',
                               min_sup=0.01, min_length=3)

In [8]:
print('Frequent location sets found: ' + str(len(freq_loc.index)))

Frequent location sets found: 87


## Sort results by support

In [9]:
freq_loc.sort_values(by='support', ascending=False).head()

Unnamed: 0,support,location_ids,length,geometry
336,0.015387,"(115, 116, 197)",3,GEOMETRYCOLLECTION (POLYGON ((21355.045 20683....
418,0.014513,"(288, 249, 265)",3,GEOMETRYCOLLECTION (POLYGON ((23186.009 21041....
396,0.014338,"(197, 193, 249)",3,GEOMETRYCOLLECTION (POLYGON ((24049.071 19908....
412,0.014163,"(249, 203, 265)",3,GEOMETRYCOLLECTION (POLYGON ((23196.800 20745....
361,0.014163,"(193, 116, 197)",3,GEOMETRYCOLLECTION (POLYGON ((23516.626 19940....


## Sort results by length

In [10]:
freq_loc.sort_values(by='length', ascending=False).head()

Unnamed: 0,support,location_ids,length,geometry
334,0.013114,"(115, 116, 190)",3,GEOMETRYCOLLECTION (POLYGON ((21355.045 20683....
389,0.010316,"(288, 197, 190)",3,GEOMETRYCOLLECTION (POLYGON ((23186.009 21041....
397,0.010841,"(193, 203, 249)",3,GEOMETRYCOLLECTION (POLYGON ((23516.626 19940....
396,0.014338,"(197, 193, 249)",3,GEOMETRYCOLLECTION (POLYGON ((24049.071 19908....
395,0.010316,"(193, 244, 197)",3,GEOMETRYCOLLECTION (POLYGON ((23516.626 19940....


## Show selected result on map

In [23]:
# result_id = 417
result_id = 336
lc.plots.map_geometry(freq_loc.to_crs(crs={'init': 'EPSG:4326'}).loc[result_id].geometry)