diff --git a/pymove/query/query.py b/pymove/query/query.py index aa02915b..2fe56f49 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -2,10 +2,11 @@ Query operations. range_query, -knn_query +knn_query, +query_all_points_by_range, """ -from __future__ import annotations +from datetime import timedelta import numpy as np import pandas as pd @@ -62,6 +63,25 @@ def range_query( ------ ValueError: if distance measure is invalid + Examples + -------- + >>> from pymove.query.query import range_query + >>> traj_df + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> range_query( + >>> traj_df, move_df, min_dist=80.5 + >>> ) + lat lon datetime id + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 """ result = traj.copy() result.drop(result.index, inplace=True) @@ -136,6 +156,27 @@ def knn_query( ------ ValueError: if distance measure is invalid + Examples + -------- + >>> from pymove.query.query import knn_query + >>> traj_df + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> knn_query( + >>> traj_df, move_df, k=1 + >>> ) + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + 2 32.5 -77.3 2012-05-19 12:00:00 4 """ k_list = pd.DataFrame([[np.Inf, 'empty']] * k, columns=['distance', TRAJ_ID]) @@ -176,3 +217,200 @@ def dist_measure(traj, this, latitude, longitude, datetime): ) return result + + +def _datetime_filter( + row: DataFrame, + move_df: DataFrame, + minimum_distance: timedelta +) -> DataFrame: + """ + Returns all the points of the DataFrame which are in a temporal distance. + + Given a row referencing to a point, a DataFrame with + multiple points and a minimum distance, it returns + all the points of the DataFrame which are in a temporal + distance equal or smaller than the minimum distance + parameter. + + Parameters + ---------- + row: dataframe + The input of one point of a trajectory. + move_df: dataframe + The input trajectory data. + minimum_distance: datetime.timedelta + the minimum temporal distance between the points. + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a temporal distance equal or smaller than the minimum + distance parameter. + + Examples + -------- + >>> from pymove.query.query import _datetime_filter + >>>> point + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> _datetime_filter(point, move_df, timedelta(hours=21010)) + lat lon datetime id temporal_distance\ + target_id target_lat target_lon target_datetime + 0 32.5 -77.3 2012-05-19 12:00:00 4 875 days 06:00:00\ + 1 16.4 -54.9 2014-10-11 18:00:00 + """ + datetime = row['datetime'] + move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs() + filtered = move_df[ + (move_df['temporal_distance'] < minimum_distance) + & (move_df['temporal_distance'] > -minimum_distance) + ] + + if (filtered.shape[0] > 0): + filtered['target_id'] = row['id'] + filtered['target_lat'] = row['lat'] + filtered['target_lon'] = row['lon'] + filtered['target_datetime'] = row['datetime'] + + return filtered + + +def _meters_filter( + row: DataFrame, + move_df: DataFrame, + minimum_distance: float +) -> DataFrame: + """ + Returns all the points of the DataFrame which are in a spatial distance. + + Given a row referencing to a point, a DataFrame with + multiple points and a minimum distance, it returns + all the points of the DataFrame which are in a spatial + distance (in meters) equal or smaller than the minimum distance + parameter. + + Parameters + ---------- + row: dataframe + The input of one point of a trajectory. + move_df: dataframe + The input trajectory data. + minimum_distance: float + the minimum spatial distance between the points in meters. + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a spatial distance equal or smaller than the minimum + distance parameter. + + Examples + -------- + >>> from pymove.query.query import _meters_filter + >>> point + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> _meters_filter(firstpoint, move_df, 3190000) + lat lon datetime id spatial_distance\ + target_id target_lat target_lon target_datetime + 0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06\ + 1 16.4 -54.9 2014-10-11 18:00:00 + """ + lat = row[LATITUDE] + lon = row[LONGITUDE] + move_df['spatial_distance'] = distances.euclidean_distance_in_meters( + lat1=lat, lon1=lon, lat2=move_df[LATITUDE], lon2=move_df[LONGITUDE] + ) + filtered = move_df[move_df['spatial_distance'] < minimum_distance] + + if (filtered.shape[0] > 0): + filtered['target_id'] = row[TRAJ_ID] + filtered['target_lat'] = row[LATITUDE] + filtered['target_lon'] = row[LONGITUDE] + filtered['target_datetime'] = row[DATETIME] + + return filtered + + +def query_all_points_by_range( + traj1: DataFrame, + move_df: DataFrame, + minimum_meters: float = 100, + minimum_time: timedelta = None +) -> DataFrame: + """ + Queries closest point within a spatial range based on meters and a temporal range. + + Selects only the points between two Move Dataframes + that have the closest point within a spatial range + based on meters and a temporal range. + + Parameters + ---------- + traj1: dataframe + The input of a trajectory data. + move_df: dataframe + The input of another trajectory data. + minimum_meters: float, optional + the minimum spatial distance, based in meters, between the points, by default 100 + minimum_time: datetime.timedelta, optional + the minimum temporal distance between the points, by default timedelta(minutes=2) + datetime_label: string, optional + the label that refers to the datetime label of the dataframes, by default DATETIME + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a spatial distance and temporal distance equal or smaller + than the minimum distance parameters. + + Examples + -------- + >>> from pymove.query.query import query_all_points_by_range + >>> traj_df + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> query_all_points_by_range( + >>> traj_df, move_df, minimum_meters=3190000, minimum_time=timedelta(hours=21010) + >>> ) + lat lon datetime id spatial_distance target_id\ + target_lat target_lon target_datetime temporal_distance + 0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06 1\ + 16.4 -54.9 2014-10-11 18:00:00 875 days 06:00:00 + """ + if minimum_time is None: + minimum_time = timedelta(minutes=2) + + result = DataFrame([]) + total = traj1.shape[0] + for _, row in progress_bar( + traj1.iterrows(), + desc='Querying all points by temporal and spatial distance', + total=total + ): + coinc_points = _meters_filter(row, move_df, minimum_meters) + coinc_points = _datetime_filter(row, coinc_points, minimum_time) + result = coinc_points.append(result) + + return result diff --git a/pymove/tests/test_query.py b/pymove/tests/test_query.py index 06de99fb..4fe83b73 100644 --- a/pymove/tests/test_query.py +++ b/pymove/tests/test_query.py @@ -1,4 +1,6 @@ -from pandas import DataFrame, Timestamp +from datetime import timedelta + +from pandas import DataFrame, Timedelta, Timestamp from pandas.testing import assert_frame_equal from pymove import MoveDataFrame @@ -281,6 +283,182 @@ [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), ' HELENE']] +expected__datetime_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), + ' ERNESTO', Timedelta('801 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), + ' ERNESTO', Timedelta('801 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), + ' ERNESTO', Timedelta('800 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), + ' ERNESTO', Timedelta('800 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), + ' ERNESTO', Timedelta('800 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), + ' ERNESTO', Timedelta('800 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), + ' ERNESTO', Timedelta('799 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), + ' ERNESTO', Timedelta('799 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), + ' ERNESTO', Timedelta('799 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), + ' ERNESTO', Timedelta('799 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), + ' HELENE', Timedelta('792 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), + ' HELENE', Timedelta('792 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), + ' HELENE', Timedelta('792 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), + ' HELENE', Timedelta('792 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), + ' HELENE', Timedelta('791 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', Timedelta('791 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', Timedelta('791 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', Timedelta('791 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', Timedelta('790 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')]] + +expected__meters_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), + ' ERNESTO', 1066161.6471417674, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), + ' ERNESTO', 900938.714887712, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), + ' ERNESTO', 721970.410639291, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), + ' ERNESTO', 555014.4613063192, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), + ' ERNESTO', 417258.69534832065, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), + ' ERNESTO', 374504.9191824335, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), + ' ERNESTO', 450805.43534718483, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), + ' ERNESTO', 624231.4229153295, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), + ' ERNESTO', 808188.0577929622, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), + ' ERNESTO', 981971.5317471786, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), + ' HELENE', 1191143.517942208, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), + ' HELENE', 1013996.5382597771, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), + ' HELENE', 822030.1743065921, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), + ' HELENE', 599470.6884598556, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), + ' HELENE', 411124.72610444814, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', 350076.3224643749, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 488900.00555183407, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 661283.3582634325, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 797018.0293108114, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')]] + +expected_query_all_points_by_range = [[13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 513299.9786988872, ' GONZALO', + 16.4, -57.9, Timestamp('2014-10-12 12:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 488900.0055518347, ' GONZALO', + 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), + Timedelta('791 days 12:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 602173.6463646247, ' GONZALO', + 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), + Timedelta('791 days 06:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 420788.62897684716, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 570760.3863635769, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 06:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 697519.455413829, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 00:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', 350076.3224643749, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 488900.00555183407, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 06:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 661283.3582634325, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 00:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 797018.0293108114, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('790 days 18:00:00')]] + def _default_traj_df(data=None): if data is None: @@ -348,3 +526,48 @@ def test_knn_query(): medt_move_df = query.knn_query(traj_df, move_df, k=2, distance='MEDT') assert_frame_equal(medt_move_df, expected_medt) + +def test__datetime_filter(): + traj_df = _default_traj_df() + firstpoint = traj_df.iloc[0] + move_df = _default_move_df() + expected = DataFrame( + data=expected__datetime_filter, + columns=['lat', 'lon', 'datetime', 'id', 'temporal_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime'], + index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38] + ) + + result = query._datetime_filter(firstpoint, move_df, timedelta(hours=20000)) + assert_frame_equal(result, expected) + + +def test__meters_filter(): + traj_df = _default_traj_df() + firstpoint = traj_df.iloc[0] + move_df = _default_move_df() + expected = DataFrame( + data=expected__meters_filter, + columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime'], + index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38] + ) + + result = query._meters_filter(firstpoint, move_df, 2000000) + assert_frame_equal(result, expected) + + +def test_query_all_points_by_range(): + traj_df = _default_traj_df(); + move_df = _default_move_df(); + expected = DataFrame( + data=expected_query_all_points_by_range, + columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime', 'temporal_distance'], + index=[38, 37, 38, 36, 37, 38, 35, 36, 37, 38] + ) + + result = query.query_all_points_by_range(traj_df, move_df, minimum_meters=1900000, minimum_time=timedelta(hours=19000)) + assert_frame_equal(result, expected) diff --git a/setup.cfg b/setup.cfg index 648286f1..790cc7c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,17 @@ ignore = E203, E266, W402, W503, F401, F841, D401 max-line-length = 90 max-complexity = 15 select = B,C,E,D,N,F,W -exclude = pymove/core/interface.py, pymove/tests/* +exclude = + .git, + __pycache__, + *.egg-info, + .venv/*, + venv/*, + .env/*, + env/*, + *.egg.info, + pymove/core/interface.py, + pymove/tests/* docstring-convention = numpy [mypy]