In [270]:
import psycopg2
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sqlalchemy import create_engine
from shapely.geometry import Point, LineString, Polygon
from shapely.ops import transform
from functools import partial
import pyproj

from config import DB_CONFIG

In [271]:
conn = psycopg2.connect(**DB_CONFIG)

if conn.closed == 0:
    print("Connected to DB")

all_points_sql = 'SELECT * FROM segmented_trajectories ORDER BY mmsi ASC,points DESC LIMIT 1000'

seg_traj =  gpd.GeoDataFrame.from_postgis(all_points_sql, conn, geom_col='geometry')

Connected to DB


In [272]:
seg_traj=seg_traj.set_crs(4326)
seg_traj = seg_traj.to_crs(2100)
seg_traj.crs

<Projected CRS: EPSG:2100>
Name: GGRS87 / Greek Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Greece - onshore.
- bounds: (19.57, 34.88, 28.3, 41.75)
Coordinate Operation:
- name: Greek Grid
- method: Transverse Mercator
Datum: Greek Geodetic Reference System 1987
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [273]:
seg_traj.head(6)

Unnamed: 0,mmsi,starting_on,ending_on,points,geometry
0,642167061,2018-01-01 00:54:38,2018-01-15 20:35:04,39,"LINESTRING (466073.445 4185050.944, 469708.471..."
1,642167061,2018-01-01 00:24:18,2018-01-01 00:50:17,14,"LINESTRING (454353.403 4187245.499, 454481.342..."
2,667005041,2018-01-09 06:21:35,2018-01-09 07:23:45,250,"LINESTRING (469995.582 4187912.741, 469931.280..."
3,667005041,2018-01-09 07:24:55,2018-01-09 07:50:55,131,"LINESTRING (465885.928 4194791.864, 465886.377..."
4,667005041,2018-01-09 08:14:25,2018-01-09 08:35:55,102,"LINESTRING (464919.591 4198642.137, 464920.235..."
5,667005041,2018-01-09 07:52:34,2018-01-09 07:59:04,34,"LINESTRING (464821.318 4195352.590, 464822.074..."


In [274]:
def resampling(trajection,radius=50,max_outliers=5, max_group=5):
    
    project = partial(
    pyproj.transform,
    pyproj.Proj('EPSG:2100'),
    pyproj.Proj('EPSG:2100'))

    #Find points that exists within a radius(in meters) with eachother
    
    to_resample = []
    for i,center_point in enumerate(trajection['geometry'].coords):
        radius_circle = transform(project,Point(center_point)).buffer(radius)
        not_included = 0
        resample_candidates = [transform(project,Point(center_point))]
        for j,point in enumerate(trajection['geometry'].coords[i+1:]):
            if radius_circle.contains(transform(project,Point(point))):
                resample_candidates.append(transform(project,Point(point)))
            else:
                if (len(resample_candidates) >= max_group) or (not_included >= max_outliers) or (j == len(trajection['geometry'].coords)-1):
                    to_resample.append(resample_candidates)
                    break
                else:
                    not_included += 1
                    
        if i == len(trajection['geometry'].coords)-1:
            to_resample.append([transform(project,Point(center_point))])
    
    #Resample near-by points with their centroid
    
    resampled = []
    
    for points in to_resample:
        if len(points) == 1:
            resampled.append(points[0])
        else:
            resampled.append(LineString(points).centroid)
            
            
    return resampled

In [275]:
#Testing resampling on the first trajectory
print('Original trajection length: {} points.'.format(seg_traj.iloc[2,:]['points']))
resampled = resampling(seg_traj.iloc[2,:])
print('Resampled trajection length: {} points.'.format(len(resampled)))


Original trajection length: 250 points.
Resampled trajection length: 238 points.


In [276]:
#Apart from trajectory, the resampling method has 2 more arguments: max_outliers, radius

#max_outliers refers to the max number of points the algorithm will search without finding any intersection
#That means that if max_outliers=5, the search for points inside the radius will stop when 5 concecutive points are not included
#radius refers to the area of search around a point

#In general the higher the number for max_outliers and radius is, the smaller the num of points 
#of the resampled trajectory will be and thus the accuracy

seg_traj['resampled_points'] = np.nan
seg_traj['resampled_traj'] = np.nan

original_points = 0
resampled_points = 0
i = 0
for index,traj in seg_traj.iterrows():
    original_points += traj['points']
    resampled_geom = resampling(traj)
    resampled_points += len(resampled_geom)
    
    seg_traj.loc[index,'resampled_points'] = len(resampled_geom)
    if len(resampled_geom) > 1:
        seg_traj.loc[index,'resampled_traj'] = LineString(resampled_geom)
    else:
        seg_traj.loc[index,'resampled_traj'] = Point(resampled_geom)
    
    if i == 5:
        break
    i+=1
    print('Resampling for trajectory {} completed successfully.\nPoints Before {} \nPoints After {}.\n'.format(index,traj['points'],len(resampled_geom)))
    
print('\nOriginal number of points: {}.'.format(original_points))
print('Number of points after resampling: {}.'.format(resampled_points))

Resampling for trajectory 0 completed successfully.
Points Before 39 
Points After 34.

Resampling for trajectory 1 completed successfully.
Points Before 14 
Points After 9.

Resampling for trajectory 2 completed successfully.
Points Before 250 
Points After 238.

Resampling for trajectory 3 completed successfully.
Points Before 131 
Points After 125.

Resampling for trajectory 4 completed successfully.
Points Before 102 
Points After 95.


Original number of points: 570.
Number of points after resampling: 526.


In [277]:
seg_traj.head(6)

Unnamed: 0,mmsi,starting_on,ending_on,points,geometry,resampled_points,resampled_traj
0,642167061,2018-01-01 00:54:38,2018-01-15 20:35:04,39,"LINESTRING (466073.445 4185050.944, 469708.471...",34.0,LINESTRING (466073.4447602328 4185050.94364822...
1,642167061,2018-01-01 00:24:18,2018-01-01 00:50:17,14,"LINESTRING (454353.403 4187245.499, 454481.342...",9.0,LINESTRING (454353.4032105839 4187245.49915016...
2,667005041,2018-01-09 06:21:35,2018-01-09 07:23:45,250,"LINESTRING (469995.582 4187912.741, 469931.280...",238.0,LINESTRING (469995.582160474 4187912.741008276...
3,667005041,2018-01-09 07:24:55,2018-01-09 07:50:55,131,"LINESTRING (465885.928 4194791.864, 465886.377...",125.0,LINESTRING (465882.8301373423 4194814.28738262...
4,667005041,2018-01-09 08:14:25,2018-01-09 08:35:55,102,"LINESTRING (464919.591 4198642.137, 464920.235...",95.0,LINESTRING (464919.5914276484 4198642.13668508...
5,667005041,2018-01-09 07:52:34,2018-01-09 07:59:04,34,"LINESTRING (464821.318 4195352.590, 464822.074...",25.0,LINESTRING (464817.6210244313 4195375.79711831...
