# Vessel Hotspots

In [1]:
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from sqlalchemy import create_engine
from geoalchemy2 import Geometry
from shapely.geometry import Point
import geopandas as gpd
import numpy as np

from config import DB_CONFIG
from preprocess import cleanse

### DB Connection Setup

In [2]:
engine = create_engine(f'postgresql://{DB_CONFIG["user"]}:{DB_CONFIG["password"]}@{DB_CONFIG["host"]}:5432/{DB_CONFIG["database"]}')

NUM_OF_VESSELS = 500000

traj_sql = f'''
SELECT timest, dates, mmsi, heading, speed, course, geom FROM vessels_points_jan
WHERE ST_X(ST_Centroid(ST_Transform(geom, 4326))) != 0 
OR ST_Y(ST_Centroid(ST_Transform(geom, 4326))) != 0 LIMIT {NUM_OF_VESSELS};
'''

### Read the dataset from PostGIS

In [3]:
gdf = gpd.GeoDataFrame.from_postgis(traj_sql, engine, geom_col='geom')

### Cleanse the dataset

In [4]:
gdf = cleanse(gdf)

### KMeans clustering

In [5]:
# Create input vector from longitude and latitude
X=np.column_stack((gdf.geom.x, gdf.geom.y))
kmeans = KMeans(n_clusters = 7, random_state = 5,  max_iter = 400)
y_kmeans = kmeans.fit_predict(X)

### Dataframe with points and clusters

In [6]:
clustered_points = gpd.GeoDataFrame({'geometry': map(lambda p: Point(p), X), 'hotspot': y_kmeans})
clustered_points.head()

Unnamed: 0,geometry,hotspot
0,POINT (23.68041 37.93103),5
1,POINT (23.68041 37.93104),5
2,POINT (23.68041 37.93104),5
3,POINT (23.63999 37.94265),2
4,POINT (23.68041 37.93104),5


### Cluster centers

In [7]:
hotspots_gdf = gpd.GeoDataFrame({'hotspot': range(kmeans.n_clusters), 'geometry': map(lambda center : Point(center), kmeans.cluster_centers_)})
hotspots_gdf.head()

Unnamed: 0,hotspot,geometry
0,0,POINT (23.59464 37.90098)
1,1,POINT (23.55553 37.95431)
2,2,POINT (23.63977 37.94226)
3,3,POINT (23.53577 37.86141)
4,4,POINT (23.60300 37.95217)


### Write the GeoDataframes to PostGIS

In [8]:
hotspots_gdf.to_postgis('vessel_hot_spots', engine, dtype={'geom': Geometry('POINT', srid='4326')})

clustered_points.to_postgis('vessels_per_hot_spot', engine, dtype={'geom': Geometry('POINT', srid='4326')})