# Collective movement

### Using `EvolvingClustersKDT.py` from the `DataStories-UniPi/EvolvingClusters` repository

In [1]:
from EvolvingClustersKDT import evolving_clusters

### Imports

In [2]:
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from sqlalchemy import create_engine
import geopandas as gpd
import numpy as np
import psycopg2

from config import DB_CONFIG
from preprocess import cleanse

### Establish a db connection with PostGIS and cleanse the data

In [13]:
RECORDS_LIMIT = 50000
conn = psycopg2.connect(**DB_CONFIG)

all_points_sql = f'SELECT * FROM vessels_points_jan_new ORDER BY timest ASC LIMIT {RECORDS_LIMIT}'
all_points_gdf =  gpd.GeoDataFrame.from_postgis(all_points_sql, conn, geom_col='geom')
gdf = cleanse(all_points_gdf)
gdf.rename(columns={'geom':'geometry'},inplace=True)

gdf.head()

Unnamed: 0,timest,mmsi,heading,turn,speed,course,geometry,dates,acceleration
0,1514758000000.0,235100119,38.518298,,0.000225,259.6,POINT (23.68197 37.93270),2017-12-31 23:00:00,4.539625e-09
1,1514758000000.0,237955000,36.015272,,8e-05,340.4,POINT (23.64043 37.94767),2017-12-31 23:00:01,2.691549e-09
2,1514758000000.0,241460000,314.136888,,4.4e-05,231.8,POINT (23.68368 37.93051),2017-12-31 23:00:02,1.498856e-10
3,1514758000000.0,235089525,287.544879,,2.2e-05,0.0,POINT (23.64809 37.93104),2017-12-31 23:00:02,9.175846e-11
4,1514758000000.0,240920000,62.838354,,0.00032,265.1,POINT (23.55782 37.92538),2017-12-31 23:00:02,1.525813e-08


In [14]:
gdf = gdf.set_crs(4326)
gdf = gdf.to_crs(2100)
gdf.crs

<Projected CRS: EPSG:2100>
Name: GGRS87 / Greek Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Greece - onshore.
- bounds: (19.57, 34.88, 28.3, 41.75)
Coordinate Operation:
- name: Greek Grid
- method: Transverse Mercator
Datum: Greek Geodetic Reference System 1987
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [15]:
gdf.head()

Unnamed: 0,timest,mmsi,heading,turn,speed,course,geometry,dates,acceleration
0,1514758000000.0,235100119,38.518298,,0.000225,259.6,POINT (471902.584 4198107.534),2017-12-31 23:00:00,4.539625e-09
1,1514758000000.0,237955000,36.015272,,8e-05,340.4,POINT (468258.234 4199781.756),2017-12-31 23:00:01,2.691549e-09
2,1514758000000.0,241460000,314.136888,,4.4e-05,231.8,POINT (472051.593 4197864.410),2017-12-31 23:00:02,1.498856e-10
3,1514758000000.0,235089525,287.544879,,2.2e-05,0.0,POINT (468924.571 4197933.869),2017-12-31 23:00:02,9.175846e-11
4,1514758000000.0,240920000,62.838354,,0.00032,265.1,POINT (460988.335 4197340.413),2017-12-31 23:00:02,1.525813e-08


In [17]:
gdf.head()

Unnamed: 0,timest,mmsi,heading,turn,speed,course,geometry,dates,acceleration,lon,lat
0,1514758000000.0,235100119,38.518298,,0.000225,259.6,POINT (471902.584 4198107.534),2017-12-31 23:00:00,4.539625e-09,471902.583944,4198108.0
1,1514758000000.0,237955000,36.015272,,8e-05,340.4,POINT (468258.234 4199781.756),2017-12-31 23:00:01,2.691549e-09,468258.234221,4199782.0
2,1514758000000.0,241460000,314.136888,,4.4e-05,231.8,POINT (472051.593 4197864.410),2017-12-31 23:00:02,1.498856e-10,472051.593049,4197864.0
3,1514758000000.0,235089525,287.544879,,2.2e-05,0.0,POINT (468924.571 4197933.869),2017-12-31 23:00:02,9.175846e-11,468924.570707,4197934.0
4,1514758000000.0,240920000,62.838354,,0.00032,265.1,POINT (460988.335 4197340.413),2017-12-31 23:00:02,1.525813e-08,460988.334939,4197340.0


In [18]:
gdf['lon'] = gdf['geometry'].apply(lambda point: point.x)
gdf['lat'] = gdf['geometry'].apply(lambda point: point.y)
gdf['timest'] = gdf['timest'].apply(lambda t: t / 1000.0)

gdf = gdf.sort_values(by='timest',ascending=True)

gdf.groupby('dates')

gdf.head()

Unnamed: 0,timest,mmsi,heading,turn,speed,course,geometry,dates,acceleration,lon,lat
0,1514758000.0,235100119,38.518298,,0.000225,259.6,POINT (471902.584 4198107.534),2017-12-31 23:00:00,4.539625e-09,471902.583944,4198108.0
1,1514758000.0,237955000,36.015272,,8e-05,340.4,POINT (468258.234 4199781.756),2017-12-31 23:00:01,2.691549e-09,468258.234221,4199782.0
2,1514758000.0,241460000,314.136888,,4.4e-05,231.8,POINT (472051.593 4197864.410),2017-12-31 23:00:02,1.498856e-10,472051.593049,4197864.0
3,1514758000.0,235089525,287.544879,,2.2e-05,0.0,POINT (468924.571 4197933.869),2017-12-31 23:00:02,9.175846e-11,468924.570707,4197934.0
4,1514758000.0,240920000,62.838354,,0.00032,265.1,POINT (460988.335 4197340.413),2017-12-31 23:00:02,1.525813e-08,460988.334939,4197340.0


In [19]:
res_mcs, res_mc = evolving_clusters(gdf, distance_threshold=3500, min_cardinality=5, time_threshold=2, disable_progress_bar=False, temporal_name='timest')

100%|████████████████████████████████████████████████████████████████████████████| 22726/22726 [07:22<00:00, 51.35it/s]


In [20]:
print(res_mc)
print(res_mcs)

Empty DataFrame
Columns: [clusters, st, et]
Index: []
Empty DataFrame
Columns: [clusters, st, et]
Index: []
