From ae791be060b4de7970cef77dd414ff23823b6770 Mon Sep 17 00:00:00 2001 From: MauricioCavalcanteOSF Date: Fri, 4 Jun 2021 09:58:44 -0300 Subject: [PATCH 01/10] insert partial query functions in the query module --- pymove/query/query.py | 126 +++++++++++++++++++- pymove/tests/test_query.py | 228 ++++++++++++++++++++++++++++++++++++- 2 files changed, 352 insertions(+), 2 deletions(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 6dd641f5..1fab0d72 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -2,7 +2,8 @@ Query operations. range_query, -knn_query +knn_query, +query_all_points_by_range, """ @@ -16,6 +17,9 @@ from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, MEDP, MEDT, TRAJ_ID from pymove.utils.log import logger, progress_bar +from datetime import timedelta +from IPython.display import clear_output + def range_query( traj: DataFrame, @@ -177,3 +181,123 @@ def dist_measure(traj, this, latitude, longitude, datetime): ) return result + + +def _datetime_filter(row, move_df, minimum_distance): + """ + Given a row referencing to a point, a DataFrame with + multiple points and a minimum distance, it returns + all the points of the DataFrame which are in a temporal + distance equal or smaller than the minimum distance + parameter. + + Parameters + ---------- + row: dataframe + The input of one point of a trajectory. + move_df: dataframe + The input trajectory data. + minimum_distance: datetime.timedelta + the minimum temporal distance between the points. + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a temporal distance equal or smaller than the minimum + distance parameter. + """ + datetime = row['datetime']; + move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs(); + filtered = move_df[(move_df['temporal_distance'] < minimum_distance) & (move_df['temporal_distance'] > -minimum_distance)]; + + if (filtered.shape[0] > 0): + filtered['target_id'] = row['id'] + filtered['target_lat'] = row['lat'] + filtered['target_lon'] = row['lon'] + filtered['target_datetime'] = row['datetime'] + + return filtered + + +def _meters_filter(row, move_df, minimum_distance): + """ + Given a row referencing to a point, a DataFrame with + multiple points and a minimum distance, it returns + all the points of the DataFrame which are in a spatial + distance (in meters) equal or smaller than the minimum distance + parameter. + + Parameters + ---------- + row: dataframe + The input of one point of a trajectory. + move_df: dataframe + The input trajectory data. + minimum_distance: float + the minimum spatial distance between the points in meters. + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a spatial distance equal or smaller than the minimum + distance parameter. + """ + lat = row['lat'] + lon = row['lon'] + move_df['spatial_distance'] = distances.euclidean_distance_in_meters(lat1=lat, lon1=lon, lat2=move_df['lat'], lon2=move_df['lon']) + filtered = move_df[move_df['spatial_distance'] < minimum_distance] + + if (filtered.shape[0] > 0): + filtered['target_id'] = row['id'] + filtered['target_lat'] = row['lat'] + filtered['target_lon'] = row['lon'] + filtered['target_datetime'] = row['datetime'] + + return filtered + + +def query_all_points_by_range(traj1, move_df, minimum_meters=100, minimum_time=timedelta(minutes=2), datetime_label=DATETIME): + """ + Selects only the points between two Move Dataframes + that have the closest point within a spatial range + based on meters and a temporal range. + + Parameters + ---------- + traj1: dataframe + The input of a trajectory data. + move_df: dataframe + The input of another trajectory data. + minimum_meters: float, optional + the minimum spatial distance, based in meters, between the points, by default 100 + minimum_time: datetime.timedelta, optional + the minimum temporal distance between the points, by default timedelta(minutes=2) + datetime_label: string, optional + the label that refers to the datetime label of the dataframes, by default DATETIME + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a spatial distance and temporal distance equal or smaller + than the minimum distance parameters. + """ + result = pd.DataFrame([]); + total = traj1.shape[0] + count = 0 + for index, row in traj1.iterrows(): + clear_output(wait=True) + print("{} de {}".format(count, total)) + print("{:.2f}%".format((count*100/total))) + coinc_points = _meters_filter(row, move_df, minimum_meters) + coinc_points = _datetime_filter(row, coinc_points, minimum_time) + result = coinc_points.append(result) + + count += 1 + + clear_output(wait=True) + print("{} de {}".format(count, total)) + print("{:.2f}%".format((count*100/total))) + + return result diff --git a/pymove/tests/test_query.py b/pymove/tests/test_query.py index 06de99fb..0659d5a7 100644 --- a/pymove/tests/test_query.py +++ b/pymove/tests/test_query.py @@ -1,10 +1,12 @@ -from pandas import DataFrame, Timestamp +from pandas import DataFrame, Timestamp, Timedelta from pandas.testing import assert_frame_equal from pymove import MoveDataFrame from pymove.query import query from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, TRAJ_ID +from datetime import timedelta + traj_example = [[16.4, -54.9, Timestamp('2014-10-11 18:00:00'), ' GONZALO'], [16.4, -55.9, Timestamp('2014-10-12 00:00:00'), @@ -281,6 +283,182 @@ [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), ' HELENE']] +expected__datetime_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), + ' ERNESTO', Timedelta('801 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), + ' ERNESTO', Timedelta('801 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), + ' ERNESTO', Timedelta('800 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), + ' ERNESTO', Timedelta('800 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), + ' ERNESTO', Timedelta('800 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), + ' ERNESTO', Timedelta('800 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), + ' ERNESTO', Timedelta('799 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), + ' ERNESTO', Timedelta('799 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), + ' ERNESTO', Timedelta('799 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), + ' ERNESTO', Timedelta('799 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), + ' HELENE', Timedelta('792 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), + ' HELENE', Timedelta('792 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), + ' HELENE', Timedelta('792 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), + ' HELENE', Timedelta('792 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), + ' HELENE', Timedelta('791 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', Timedelta('791 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', Timedelta('791 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', Timedelta('791 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', Timedelta('790 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')]] + +expected__meters_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), + ' ERNESTO', 1066161.6471417674, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), + ' ERNESTO', 900938.714887712, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), + ' ERNESTO', 721970.410639291, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), + ' ERNESTO', 555014.4613063192, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), + ' ERNESTO', 417258.69534832065, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), + ' ERNESTO', 374504.9191824335, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), + ' ERNESTO', 450805.43534718483, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), + ' ERNESTO', 624231.4229153295, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), + ' ERNESTO', 808188.0577929622, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), + ' ERNESTO', 981971.5317471786, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), + ' HELENE', 1191143.517942208, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), + ' HELENE', 1013996.5382597771, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), + ' HELENE', 822030.1743065921, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), + ' HELENE', 599470.6884598556, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), + ' HELENE', 411124.72610444814, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', 350076.3224643749, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 488900.00555183407, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 661283.3582634325, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 797018.0293108114, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')]] + +expected_query_all_points_by_range = [[13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 513299.9786988872, ' GONZALO', + 16.4, -57.9, Timestamp('2014-10-12 12:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 488900.0055518347, ' GONZALO', + 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), + Timedelta('791 days 12:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 602173.6463646247, ' GONZALO', + 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), + Timedelta('791 days 06:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 420788.62897684716, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 570760.3863635769, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 06:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 697519.455413829, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 00:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', 350076.3224643749, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 488900.00555183407, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 06:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 661283.3582634325, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 00:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 797018.0293108114, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('790 days 18:00:00')]] + def _default_traj_df(data=None): if data is None: @@ -348,3 +526,51 @@ def test_knn_query(): medt_move_df = query.knn_query(traj_df, move_df, k=2, distance='MEDT') assert_frame_equal(medt_move_df, expected_medt) + +def test__datetime_filter(): + traj_df = _default_traj_df() + firstpoint = traj_df.iloc[0] + move_df = _default_move_df() + expected = DataFrame( + data=expected__datetime_filter, + columns=['lat', 'lon', 'datetime', 'id', 'temporal_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime'], + index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38] + ) + + result = query._datetime_filter(firstpoint, move_df, timedelta(hours=20000)) + assert_frame_equal(result, expected) + + +def test__meters_filter(): + traj_df = _default_traj_df() + firstpoint = traj_df.iloc[0] + move_df = _default_move_df() + expected = DataFrame( + data=expected__meters_filter, + columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime'], + index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38] + ) + + result = query._meters_filter(firstpoint, move_df, 2000000) + assert_frame_equal(result, expected) + + +def test_query_all_points_by_range(): + traj_df = _default_traj_df(); + move_df = _default_move_df(); + expected = DataFrame( + data=expected_query_all_points_by_range, + columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime', 'temporal_distance'], + index=[38, 37, 38, 36, 37, 38, 35, 36, 37, 38] + ) + + result = query.query_all_points_by_range(traj_df, move_df, minimum_meters=1900000, minimum_time=timedelta(hours=19000)) + assert_frame_equal(result, expected) + + + \ No newline at end of file From 0a6ff3e9e12ec509dcc31e6936cd1799908fe35c Mon Sep 17 00:00:00 2001 From: MauricioCavalcanteOSF Date: Fri, 4 Jun 2021 10:14:27 -0300 Subject: [PATCH 02/10] remove mauriciocavalcanteosf code --- pymove/query/query.py | 124 -------------------- pymove/tests/test_query.py | 227 +------------------------------------ 2 files changed, 1 insertion(+), 350 deletions(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 1fab0d72..dfaf3ce9 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -3,7 +3,6 @@ range_query, knn_query, -query_all_points_by_range, """ @@ -17,9 +16,6 @@ from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, MEDP, MEDT, TRAJ_ID from pymove.utils.log import logger, progress_bar -from datetime import timedelta -from IPython.display import clear_output - def range_query( traj: DataFrame, @@ -181,123 +177,3 @@ def dist_measure(traj, this, latitude, longitude, datetime): ) return result - - -def _datetime_filter(row, move_df, minimum_distance): - """ - Given a row referencing to a point, a DataFrame with - multiple points and a minimum distance, it returns - all the points of the DataFrame which are in a temporal - distance equal or smaller than the minimum distance - parameter. - - Parameters - ---------- - row: dataframe - The input of one point of a trajectory. - move_df: dataframe - The input trajectory data. - minimum_distance: datetime.timedelta - the minimum temporal distance between the points. - - Returns - ------- - DataFrame - dataframe with all the points of move_df which are in - a temporal distance equal or smaller than the minimum - distance parameter. - """ - datetime = row['datetime']; - move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs(); - filtered = move_df[(move_df['temporal_distance'] < minimum_distance) & (move_df['temporal_distance'] > -minimum_distance)]; - - if (filtered.shape[0] > 0): - filtered['target_id'] = row['id'] - filtered['target_lat'] = row['lat'] - filtered['target_lon'] = row['lon'] - filtered['target_datetime'] = row['datetime'] - - return filtered - - -def _meters_filter(row, move_df, minimum_distance): - """ - Given a row referencing to a point, a DataFrame with - multiple points and a minimum distance, it returns - all the points of the DataFrame which are in a spatial - distance (in meters) equal or smaller than the minimum distance - parameter. - - Parameters - ---------- - row: dataframe - The input of one point of a trajectory. - move_df: dataframe - The input trajectory data. - minimum_distance: float - the minimum spatial distance between the points in meters. - Returns - ------- - DataFrame - dataframe with all the points of move_df which are in - a spatial distance equal or smaller than the minimum - distance parameter. - """ - lat = row['lat'] - lon = row['lon'] - move_df['spatial_distance'] = distances.euclidean_distance_in_meters(lat1=lat, lon1=lon, lat2=move_df['lat'], lon2=move_df['lon']) - filtered = move_df[move_df['spatial_distance'] < minimum_distance] - - if (filtered.shape[0] > 0): - filtered['target_id'] = row['id'] - filtered['target_lat'] = row['lat'] - filtered['target_lon'] = row['lon'] - filtered['target_datetime'] = row['datetime'] - - return filtered - - -def query_all_points_by_range(traj1, move_df, minimum_meters=100, minimum_time=timedelta(minutes=2), datetime_label=DATETIME): - """ - Selects only the points between two Move Dataframes - that have the closest point within a spatial range - based on meters and a temporal range. - - Parameters - ---------- - traj1: dataframe - The input of a trajectory data. - move_df: dataframe - The input of another trajectory data. - minimum_meters: float, optional - the minimum spatial distance, based in meters, between the points, by default 100 - minimum_time: datetime.timedelta, optional - the minimum temporal distance between the points, by default timedelta(minutes=2) - datetime_label: string, optional - the label that refers to the datetime label of the dataframes, by default DATETIME - - Returns - ------- - DataFrame - dataframe with all the points of move_df which are in - a spatial distance and temporal distance equal or smaller - than the minimum distance parameters. - """ - result = pd.DataFrame([]); - total = traj1.shape[0] - count = 0 - for index, row in traj1.iterrows(): - clear_output(wait=True) - print("{} de {}".format(count, total)) - print("{:.2f}%".format((count*100/total))) - coinc_points = _meters_filter(row, move_df, minimum_meters) - coinc_points = _datetime_filter(row, coinc_points, minimum_time) - result = coinc_points.append(result) - - count += 1 - - clear_output(wait=True) - print("{} de {}".format(count, total)) - print("{:.2f}%".format((count*100/total))) - - return result diff --git a/pymove/tests/test_query.py b/pymove/tests/test_query.py index 0659d5a7..b5c28f64 100644 --- a/pymove/tests/test_query.py +++ b/pymove/tests/test_query.py @@ -1,12 +1,10 @@ -from pandas import DataFrame, Timestamp, Timedelta +from pandas import DataFrame, Timestamp from pandas.testing import assert_frame_equal from pymove import MoveDataFrame from pymove.query import query from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, TRAJ_ID -from datetime import timedelta - traj_example = [[16.4, -54.9, Timestamp('2014-10-11 18:00:00'), ' GONZALO'], [16.4, -55.9, Timestamp('2014-10-12 00:00:00'), @@ -283,182 +281,6 @@ [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), ' HELENE']] -expected__datetime_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), - ' ERNESTO', Timedelta('801 days 06:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), - ' ERNESTO', Timedelta('801 days 00:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), - ' ERNESTO', Timedelta('800 days 18:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), - ' ERNESTO', Timedelta('800 days 12:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), - ' ERNESTO', Timedelta('800 days 06:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), - ' ERNESTO', Timedelta('800 days 00:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), - ' ERNESTO', Timedelta('799 days 18:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), - ' ERNESTO', Timedelta('799 days 12:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), - ' ERNESTO', Timedelta('799 days 06:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), - ' ERNESTO', Timedelta('799 days 00:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), - ' HELENE', Timedelta('792 days 18:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), - ' HELENE', Timedelta('792 days 12:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), - ' HELENE', Timedelta('792 days 06:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), - ' HELENE', Timedelta('792 days 00:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), - ' HELENE', Timedelta('791 days 18:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), - ' HELENE', Timedelta('791 days 12:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), - ' HELENE', Timedelta('791 days 06:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), - ' HELENE', Timedelta('791 days 00:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')], - [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), - ' HELENE', Timedelta('790 days 18:00:00'), - ' GONZALO', 16.4, -54.9, - Timestamp('2014-10-11 18:00:00')]] - -expected__meters_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), - ' ERNESTO', 1066161.6471417674, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), - ' ERNESTO', 900938.714887712, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), - ' ERNESTO', 721970.410639291, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), - ' ERNESTO', 555014.4613063192, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), - ' ERNESTO', 417258.69534832065, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), - ' ERNESTO', 374504.9191824335, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), - ' ERNESTO', 450805.43534718483, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), - ' ERNESTO', 624231.4229153295, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), - ' ERNESTO', 808188.0577929622, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), - ' ERNESTO', 981971.5317471786, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), - ' HELENE', 1191143.517942208, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), - ' HELENE', 1013996.5382597771, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), - ' HELENE', 822030.1743065921, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), - ' HELENE', 599470.6884598556, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), - ' HELENE', 411124.72610444814, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), - ' HELENE', 350076.3224643749, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), - ' HELENE', 488900.00555183407, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), - ' HELENE', 661283.3582634325, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], - [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), - ' HELENE', 797018.0293108114, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00')]] - -expected_query_all_points_by_range = [[13.5, -61.4, Timestamp('2012-08-12 00:00:00'), - ' HELENE', 513299.9786988872, ' GONZALO', - 16.4, -57.9, Timestamp('2014-10-12 12:00:00'), - Timedelta('791 days 12:00:00')], - [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), - ' HELENE', 488900.0055518347, ' GONZALO', - 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), - Timedelta('791 days 12:00:00')], - [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), - ' HELENE', 602173.6463646247, ' GONZALO', - 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), - Timedelta('791 days 06:00:00')], - [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), - ' HELENE', 420788.62897684716, ' GONZALO', - 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), - Timedelta('791 days 12:00:00')], - [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), - ' HELENE', 570760.3863635769, ' GONZALO', - 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), - Timedelta('791 days 06:00:00')], - [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), - ' HELENE', 697519.455413829, ' GONZALO', - 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), - Timedelta('791 days 00:00:00')], - [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), - ' HELENE', 350076.3224643749, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), - Timedelta('791 days 12:00:00')], - [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), - ' HELENE', 488900.00555183407, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), - Timedelta('791 days 06:00:00')], - [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), - ' HELENE', 661283.3582634325, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), - Timedelta('791 days 00:00:00')], - [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), - ' HELENE', 797018.0293108114, ' GONZALO', - 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), - Timedelta('790 days 18:00:00')]] - def _default_traj_df(data=None): if data is None: @@ -526,51 +348,4 @@ def test_knn_query(): medt_move_df = query.knn_query(traj_df, move_df, k=2, distance='MEDT') assert_frame_equal(medt_move_df, expected_medt) - -def test__datetime_filter(): - traj_df = _default_traj_df() - firstpoint = traj_df.iloc[0] - move_df = _default_move_df() - expected = DataFrame( - data=expected__datetime_filter, - columns=['lat', 'lon', 'datetime', 'id', 'temporal_distance', 'target_id', - 'target_lat', 'target_lon', 'target_datetime'], - index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, - 37, 38] - ) - - result = query._datetime_filter(firstpoint, move_df, timedelta(hours=20000)) - assert_frame_equal(result, expected) - - -def test__meters_filter(): - traj_df = _default_traj_df() - firstpoint = traj_df.iloc[0] - move_df = _default_move_df() - expected = DataFrame( - data=expected__meters_filter, - columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', - 'target_lat', 'target_lon', 'target_datetime'], - index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, - 37, 38] - ) - - result = query._meters_filter(firstpoint, move_df, 2000000) - assert_frame_equal(result, expected) - - -def test_query_all_points_by_range(): - traj_df = _default_traj_df(); - move_df = _default_move_df(); - expected = DataFrame( - data=expected_query_all_points_by_range, - columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', - 'target_lat', 'target_lon', 'target_datetime', 'temporal_distance'], - index=[38, 37, 38, 36, 37, 38, 35, 36, 37, 38] - ) - - result = query.query_all_points_by_range(traj_df, move_df, minimum_meters=1900000, minimum_time=timedelta(hours=19000)) - assert_frame_equal(result, expected) - - \ No newline at end of file From 2aefd3ac35f7b087b583aa9e455a531095a6a2b9 Mon Sep 17 00:00:00 2001 From: MCavalcante16 Date: Fri, 4 Jun 2021 10:18:20 -0300 Subject: [PATCH 03/10] add a space after imports in test_query --- pymove/tests/test_query.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymove/tests/test_query.py b/pymove/tests/test_query.py index b5c28f64..e8338daa 100644 --- a/pymove/tests/test_query.py +++ b/pymove/tests/test_query.py @@ -5,6 +5,7 @@ from pymove.query import query from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, TRAJ_ID + traj_example = [[16.4, -54.9, Timestamp('2014-10-11 18:00:00'), ' GONZALO'], [16.4, -55.9, Timestamp('2014-10-12 00:00:00'), From 74171b9285a3c3a80fb1fa2d2add608e339fe540 Mon Sep 17 00:00:00 2001 From: MCavalcante16 Date: Fri, 4 Jun 2021 10:20:42 -0300 Subject: [PATCH 04/10] insert partial similarity query functions with the respective tests --- pymove/query/query.py | 124 ++++++++++++++++++++ pymove/tests/test_query.py | 226 ++++++++++++++++++++++++++++++++++++- 2 files changed, 349 insertions(+), 1 deletion(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index dfaf3ce9..1fab0d72 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -3,6 +3,7 @@ range_query, knn_query, +query_all_points_by_range, """ @@ -16,6 +17,9 @@ from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, MEDP, MEDT, TRAJ_ID from pymove.utils.log import logger, progress_bar +from datetime import timedelta +from IPython.display import clear_output + def range_query( traj: DataFrame, @@ -177,3 +181,123 @@ def dist_measure(traj, this, latitude, longitude, datetime): ) return result + + +def _datetime_filter(row, move_df, minimum_distance): + """ + Given a row referencing to a point, a DataFrame with + multiple points and a minimum distance, it returns + all the points of the DataFrame which are in a temporal + distance equal or smaller than the minimum distance + parameter. + + Parameters + ---------- + row: dataframe + The input of one point of a trajectory. + move_df: dataframe + The input trajectory data. + minimum_distance: datetime.timedelta + the minimum temporal distance between the points. + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a temporal distance equal or smaller than the minimum + distance parameter. + """ + datetime = row['datetime']; + move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs(); + filtered = move_df[(move_df['temporal_distance'] < minimum_distance) & (move_df['temporal_distance'] > -minimum_distance)]; + + if (filtered.shape[0] > 0): + filtered['target_id'] = row['id'] + filtered['target_lat'] = row['lat'] + filtered['target_lon'] = row['lon'] + filtered['target_datetime'] = row['datetime'] + + return filtered + + +def _meters_filter(row, move_df, minimum_distance): + """ + Given a row referencing to a point, a DataFrame with + multiple points and a minimum distance, it returns + all the points of the DataFrame which are in a spatial + distance (in meters) equal or smaller than the minimum distance + parameter. + + Parameters + ---------- + row: dataframe + The input of one point of a trajectory. + move_df: dataframe + The input trajectory data. + minimum_distance: float + the minimum spatial distance between the points in meters. + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a spatial distance equal or smaller than the minimum + distance parameter. + """ + lat = row['lat'] + lon = row['lon'] + move_df['spatial_distance'] = distances.euclidean_distance_in_meters(lat1=lat, lon1=lon, lat2=move_df['lat'], lon2=move_df['lon']) + filtered = move_df[move_df['spatial_distance'] < minimum_distance] + + if (filtered.shape[0] > 0): + filtered['target_id'] = row['id'] + filtered['target_lat'] = row['lat'] + filtered['target_lon'] = row['lon'] + filtered['target_datetime'] = row['datetime'] + + return filtered + + +def query_all_points_by_range(traj1, move_df, minimum_meters=100, minimum_time=timedelta(minutes=2), datetime_label=DATETIME): + """ + Selects only the points between two Move Dataframes + that have the closest point within a spatial range + based on meters and a temporal range. + + Parameters + ---------- + traj1: dataframe + The input of a trajectory data. + move_df: dataframe + The input of another trajectory data. + minimum_meters: float, optional + the minimum spatial distance, based in meters, between the points, by default 100 + minimum_time: datetime.timedelta, optional + the minimum temporal distance between the points, by default timedelta(minutes=2) + datetime_label: string, optional + the label that refers to the datetime label of the dataframes, by default DATETIME + + Returns + ------- + DataFrame + dataframe with all the points of move_df which are in + a spatial distance and temporal distance equal or smaller + than the minimum distance parameters. + """ + result = pd.DataFrame([]); + total = traj1.shape[0] + count = 0 + for index, row in traj1.iterrows(): + clear_output(wait=True) + print("{} de {}".format(count, total)) + print("{:.2f}%".format((count*100/total))) + coinc_points = _meters_filter(row, move_df, minimum_meters) + coinc_points = _datetime_filter(row, coinc_points, minimum_time) + result = coinc_points.append(result) + + count += 1 + + clear_output(wait=True) + print("{} de {}".format(count, total)) + print("{:.2f}%".format((count*100/total))) + + return result diff --git a/pymove/tests/test_query.py b/pymove/tests/test_query.py index e8338daa..0659d5a7 100644 --- a/pymove/tests/test_query.py +++ b/pymove/tests/test_query.py @@ -1,10 +1,11 @@ -from pandas import DataFrame, Timestamp +from pandas import DataFrame, Timestamp, Timedelta from pandas.testing import assert_frame_equal from pymove import MoveDataFrame from pymove.query import query from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, TRAJ_ID +from datetime import timedelta traj_example = [[16.4, -54.9, Timestamp('2014-10-11 18:00:00'), ' GONZALO'], @@ -282,6 +283,182 @@ [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), ' HELENE']] +expected__datetime_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), + ' ERNESTO', Timedelta('801 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), + ' ERNESTO', Timedelta('801 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), + ' ERNESTO', Timedelta('800 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), + ' ERNESTO', Timedelta('800 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), + ' ERNESTO', Timedelta('800 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), + ' ERNESTO', Timedelta('800 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), + ' ERNESTO', Timedelta('799 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), + ' ERNESTO', Timedelta('799 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), + ' ERNESTO', Timedelta('799 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), + ' ERNESTO', Timedelta('799 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), + ' HELENE', Timedelta('792 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), + ' HELENE', Timedelta('792 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), + ' HELENE', Timedelta('792 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), + ' HELENE', Timedelta('792 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), + ' HELENE', Timedelta('791 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', Timedelta('791 days 12:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', Timedelta('791 days 06:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', Timedelta('791 days 00:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', Timedelta('790 days 18:00:00'), + ' GONZALO', 16.4, -54.9, + Timestamp('2014-10-11 18:00:00')]] + +expected__meters_filter = [[11.6, -46.7, Timestamp('2012-08-01 12:00:00'), + ' ERNESTO', 1066161.6471417674, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.0, -48.2, Timestamp('2012-08-01 18:00:00'), + ' ERNESTO', 900938.714887712, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.4, -49.9, Timestamp('2012-08-02 00:00:00'), + ' ERNESTO', 721970.410639291, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [12.7, -51.7, Timestamp('2012-08-02 06:00:00'), + ' ERNESTO', 555014.4613063192, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.0, -53.6, Timestamp('2012-08-02 12:00:00'), + ' ERNESTO', 417258.69534832065, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.2, -55.5, Timestamp('2012-08-02 18:00:00'), + ' ERNESTO', 374504.9191824335, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -57.5, Timestamp('2012-08-03 00:00:00'), + ' ERNESTO', 450805.43534718483, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.6, -59.7, Timestamp('2012-08-03 06:00:00'), + ' ERNESTO', 624231.4229153295, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.7, -61.6, Timestamp('2012-08-03 12:00:00'), + ' ERNESTO', 808188.0577929622, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.8, -63.3, Timestamp('2012-08-03 18:00:00'), + ' ERNESTO', 981971.5317471786, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.6, -44.6, Timestamp('2012-08-10 00:00:00'), + ' HELENE', 1191143.517942208, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.5, -46.3, Timestamp('2012-08-10 06:00:00'), + ' HELENE', 1013996.5382597771, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -48.2, Timestamp('2012-08-10 12:00:00'), + ' HELENE', 822030.1743065921, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -50.5, Timestamp('2012-08-10 18:00:00'), + ' HELENE', 599470.6884598556, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -52.9, Timestamp('2012-08-11 00:00:00'), + ' HELENE', 411124.72610444814, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', 350076.3224643749, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 488900.00555183407, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 661283.3582634325, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 797018.0293108114, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00')]] + +expected_query_all_points_by_range = [[13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 513299.9786988872, ' GONZALO', + 16.4, -57.9, Timestamp('2014-10-12 12:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 488900.0055518347, ' GONZALO', + 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), + Timedelta('791 days 12:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 602173.6463646247, ' GONZALO', + 16.4, -56.9, Timestamp('2014-10-12 06:00:00'), + Timedelta('791 days 06:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 420788.62897684716, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 570760.3863635769, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 06:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 697519.455413829, ' GONZALO', + 16.4, -55.9, Timestamp('2014-10-12 00:00:00'), + Timedelta('791 days 00:00:00')], + [13.4, -55.4, Timestamp('2012-08-11 06:00:00'), + ' HELENE', 350076.3224643749, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 12:00:00')], + [13.3, -57.9, Timestamp('2012-08-11 12:00:00'), + ' HELENE', 488900.00555183407, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 06:00:00')], + [13.3, -59.9, Timestamp('2012-08-11 18:00:00'), + ' HELENE', 661283.3582634325, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('791 days 00:00:00')], + [13.5, -61.4, Timestamp('2012-08-12 00:00:00'), + ' HELENE', 797018.0293108114, ' GONZALO', + 16.4, -54.9, Timestamp('2014-10-11 18:00:00'), + Timedelta('790 days 18:00:00')]] + def _default_traj_df(data=None): if data is None: @@ -349,4 +526,51 @@ def test_knn_query(): medt_move_df = query.knn_query(traj_df, move_df, k=2, distance='MEDT') assert_frame_equal(medt_move_df, expected_medt) + +def test__datetime_filter(): + traj_df = _default_traj_df() + firstpoint = traj_df.iloc[0] + move_df = _default_move_df() + expected = DataFrame( + data=expected__datetime_filter, + columns=['lat', 'lon', 'datetime', 'id', 'temporal_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime'], + index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38] + ) + + result = query._datetime_filter(firstpoint, move_df, timedelta(hours=20000)) + assert_frame_equal(result, expected) + + +def test__meters_filter(): + traj_df = _default_traj_df() + firstpoint = traj_df.iloc[0] + move_df = _default_move_df() + expected = DataFrame( + data=expected__meters_filter, + columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime'], + index=[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38] + ) + + result = query._meters_filter(firstpoint, move_df, 2000000) + assert_frame_equal(result, expected) + + +def test_query_all_points_by_range(): + traj_df = _default_traj_df(); + move_df = _default_move_df(); + expected = DataFrame( + data=expected_query_all_points_by_range, + columns=['lat', 'lon', 'datetime', 'id', 'spatial_distance', 'target_id', + 'target_lat', 'target_lon', 'target_datetime', 'temporal_distance'], + index=[38, 37, 38, 36, 37, 38, 35, 36, 37, 38] + ) + + result = query.query_all_points_by_range(traj_df, move_df, minimum_meters=1900000, minimum_time=timedelta(hours=19000)) + assert_frame_equal(result, expected) + + \ No newline at end of file From 5805010b55e615f056024bfc97703a95602aed1e Mon Sep 17 00:00:00 2001 From: flych3r Date: Fri, 4 Jun 2021 10:46:31 -0300 Subject: [PATCH 05/10] fix linting --- pymove/query/query.py | 72 ++++++++++++++++++++++++-------------- pymove/tests/test_query.py | 13 +++---- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 1fab0d72..d574e1c8 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -3,23 +3,22 @@ range_query, knn_query, -query_all_points_by_range, +query_all_points_by_range, """ +from datetime import timedelta from typing import Optional, Text import numpy as np import pandas as pd +from IPython.display import clear_output from pandas import DataFrame from pymove.utils import distances from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, MEDP, MEDT, TRAJ_ID from pymove.utils.log import logger, progress_bar -from datetime import timedelta -from IPython.display import clear_output - def range_query( traj: DataFrame, @@ -185,10 +184,12 @@ def dist_measure(traj, this, latitude, longitude, datetime): def _datetime_filter(row, move_df, minimum_distance): """ + Returns all the points of the DataFrame which are in a temporal distance. + Given a row referencing to a point, a DataFrame with multiple points and a minimum distance, it returns all the points of the DataFrame which are in a temporal - distance equal or smaller than the minimum distance + distance equal or smaller than the minimum distance parameter. Parameters @@ -207,25 +208,30 @@ def _datetime_filter(row, move_df, minimum_distance): a temporal distance equal or smaller than the minimum distance parameter. """ - datetime = row['datetime']; - move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs(); - filtered = move_df[(move_df['temporal_distance'] < minimum_distance) & (move_df['temporal_distance'] > -minimum_distance)]; - + datetime = row['datetime'] + move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs() + filtered = move_df[ + (move_df['temporal_distance'] < minimum_distance) + & (move_df['temporal_distance'] > -minimum_distance) + ] + if (filtered.shape[0] > 0): filtered['target_id'] = row['id'] filtered['target_lat'] = row['lat'] filtered['target_lon'] = row['lon'] filtered['target_datetime'] = row['datetime'] - + return filtered def _meters_filter(row, move_df, minimum_distance): """ + Returns all the points of the DataFrame which are in a spatial distance. + Given a row referencing to a point, a DataFrame with multiple points and a minimum distance, it returns all the points of the DataFrame which are in a spatial - distance (in meters) equal or smaller than the minimum distance + distance (in meters) equal or smaller than the minimum distance parameter. Parameters @@ -236,8 +242,9 @@ def _meters_filter(row, move_df, minimum_distance): The input trajectory data. minimum_distance: float the minimum spatial distance between the points in meters. + Returns - ------- + ------- DataFrame dataframe with all the points of move_df which are in a spatial distance equal or smaller than the minimum @@ -245,22 +252,32 @@ def _meters_filter(row, move_df, minimum_distance): """ lat = row['lat'] lon = row['lon'] - move_df['spatial_distance'] = distances.euclidean_distance_in_meters(lat1=lat, lon1=lon, lat2=move_df['lat'], lon2=move_df['lon']) + move_df['spatial_distance'] = distances.euclidean_distance_in_meters( + lat1=lat, lon1=lon, lat2=move_df['lat'], lon2=move_df['lon'] + ) filtered = move_df[move_df['spatial_distance'] < minimum_distance] - + if (filtered.shape[0] > 0): filtered['target_id'] = row['id'] filtered['target_lat'] = row['lat'] filtered['target_lon'] = row['lon'] filtered['target_datetime'] = row['datetime'] - + return filtered -def query_all_points_by_range(traj1, move_df, minimum_meters=100, minimum_time=timedelta(minutes=2), datetime_label=DATETIME): +def query_all_points_by_range( + traj1, + move_df, + minimum_meters=100, + minimum_time=None, + datetime_label=DATETIME +): """ - Selects only the points between two Move Dataframes - that have the closest point within a spatial range + Queries closest point within a spatial range based on meters and a temporal range. + + Selects only the points between two Move Dataframes + that have the closest point within a spatial range based on meters and a temporal range. Parameters @@ -283,21 +300,24 @@ def query_all_points_by_range(traj1, move_df, minimum_meters=100, minimum_time=t a spatial distance and temporal distance equal or smaller than the minimum distance parameters. """ - result = pd.DataFrame([]); + if minimum_time is None: + minimum_time = timedelta(minutes=2) + + result = pd.DataFrame([]) total = traj1.shape[0] count = 0 - for index, row in traj1.iterrows(): + for _, row in traj1.iterrows(): clear_output(wait=True) - print("{} de {}".format(count, total)) - print("{:.2f}%".format((count*100/total))) + print('{} de {}'.format(count, total)) + print('{:.2f}%'.format((count * 100 / total))) coinc_points = _meters_filter(row, move_df, minimum_meters) coinc_points = _datetime_filter(row, coinc_points, minimum_time) result = coinc_points.append(result) - + count += 1 - + clear_output(wait=True) - print("{} de {}".format(count, total)) - print("{:.2f}%".format((count*100/total))) + print('{} de {}'.format(count, total)) + print('{:.2f}%'.format((count * 100 / total))) return result diff --git a/pymove/tests/test_query.py b/pymove/tests/test_query.py index 0659d5a7..4fe83b73 100644 --- a/pymove/tests/test_query.py +++ b/pymove/tests/test_query.py @@ -1,12 +1,12 @@ -from pandas import DataFrame, Timestamp, Timedelta +from datetime import timedelta + +from pandas import DataFrame, Timedelta, Timestamp from pandas.testing import assert_frame_equal from pymove import MoveDataFrame from pymove.query import query from pymove.utils.constants import DATETIME, LATITUDE, LONGITUDE, TRAJ_ID -from datetime import timedelta - traj_example = [[16.4, -54.9, Timestamp('2014-10-11 18:00:00'), ' GONZALO'], [16.4, -55.9, Timestamp('2014-10-12 00:00:00'), @@ -527,7 +527,7 @@ def test_knn_query(): medt_move_df = query.knn_query(traj_df, move_df, k=2, distance='MEDT') assert_frame_equal(medt_move_df, expected_medt) -def test__datetime_filter(): +def test__datetime_filter(): traj_df = _default_traj_df() firstpoint = traj_df.iloc[0] move_df = _default_move_df() @@ -543,7 +543,7 @@ def test__datetime_filter(): assert_frame_equal(result, expected) -def test__meters_filter(): +def test__meters_filter(): traj_df = _default_traj_df() firstpoint = traj_df.iloc[0] move_df = _default_move_df() @@ -571,6 +571,3 @@ def test_query_all_points_by_range(): result = query.query_all_points_by_range(traj_df, move_df, minimum_meters=1900000, minimum_time=timedelta(hours=19000)) assert_frame_equal(result, expected) - - - \ No newline at end of file From 81e7314a5a0f0e3b067fb8c5c822668ce48e103b Mon Sep 17 00:00:00 2001 From: "mcavalcante.b16@gmail.com" Date: Sun, 13 Jun 2021 01:00:39 -0300 Subject: [PATCH 06/10] add type hints --- pymove/query/query.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 1fab0d72..940cbb1a 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -183,7 +183,11 @@ def dist_measure(traj, this, latitude, longitude, datetime): return result -def _datetime_filter(row, move_df, minimum_distance): +def _datetime_filter( + row: DataFrame, + move_df: DataFrame, + minimum_distance: TimeDelta +): """ Given a row referencing to a point, a DataFrame with multiple points and a minimum distance, it returns @@ -220,7 +224,10 @@ def _datetime_filter(row, move_df, minimum_distance): return filtered -def _meters_filter(row, move_df, minimum_distance): +def _meters_filter( + row: DataFrame, + move_df: DataFrame, + minimum_distance: float): """ Given a row referencing to a point, a DataFrame with multiple points and a minimum distance, it returns @@ -257,7 +264,12 @@ def _meters_filter(row, move_df, minimum_distance): return filtered -def query_all_points_by_range(traj1, move_df, minimum_meters=100, minimum_time=timedelta(minutes=2), datetime_label=DATETIME): +def query_all_points_by_range( + traj1: DataFrame, + move_df: DataFrame, + minimum_meters: Optional[float] = 100, + minimum_time: Optional[TimeDelta] =timedelta(minutes=2), + datetime_label: Optional[Text] = DATETIME): """ Selects only the points between two Move Dataframes that have the closest point within a spatial range From 2cc3f3cd9fac4366480141867368c5e1d9b2b1cd Mon Sep 17 00:00:00 2001 From: "mcavalcante.b16@gmail.com" Date: Sun, 13 Jun 2021 02:29:18 -0300 Subject: [PATCH 07/10] add examples --- pymove/query/query.py | 52 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 8130d271..76cb7c79 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -185,7 +185,7 @@ def dist_measure(traj, this, latitude, longitude, datetime): def _datetime_filter( row: DataFrame, move_df: DataFrame, - minimum_distance: TimeDelta + minimum_distance: timedelta ): """ Returns all the points of the DataFrame which are in a temporal distance. @@ -211,6 +211,22 @@ def _datetime_filter( dataframe with all the points of move_df which are in a temporal distance equal or smaller than the minimum distance parameter. + + Examples + -------- + >>> from pymove.query.query import _datetime_filter + >>> point + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> _datetime_filter(point, move_df, timedelta(hours=21010)) + lat lon datetime id temporal_distance target_id target_lat target_lon target_datetime + 0 32.5 -77.3 2012-05-19 12:00:00 4 875 days 06:00:00 1 16.4 -54.9 2014-10-11 18:00:00 + """ datetime = row['datetime'] move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs() @@ -256,6 +272,21 @@ def _meters_filter( dataframe with all the points of move_df which are in a spatial distance equal or smaller than the minimum distance parameter. + + Examples + -------- + >>> from pymove.query.query import _meters_filter + >>> point + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> _meters_filter(firstpoint, move_df, 3190000) + lat lon datetime id spatial_distance target_id target_lat target_lon target_datetime + 0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06 1 16.4 -54.9 2014-10-11 18:00:00 """ lat = row['lat'] lon = row['lon'] @@ -277,7 +308,7 @@ def query_all_points_by_range( traj1: DataFrame, move_df: DataFrame, minimum_meters: Optional[float] = 100, - minimum_time: Optional[TimeDelta] =timedelta(minutes=2), + minimum_time: Optional[timedelta] =timedelta(minutes=2), datetime_label: Optional[Text] = DATETIME): """ Queries closest point within a spatial range based on meters and a temporal range. @@ -305,6 +336,23 @@ def query_all_points_by_range( dataframe with all the points of move_df which are in a spatial distance and temporal distance equal or smaller than the minimum distance parameters. + + Examples + -------- + >>> from pymove.query.query import query_all_points_by_range + >>> traj_df + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> query_all_points_by_range(traj_df, move_df, minimum_meters=3190000, minimum_time=timedelta(hours=21010)) + lat lon datetime id spatial_distance target_id target_lat target_lon target_datetime temporal_distance + 0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06 1 16.4 -54.9 2014-10-11 18:00:00 875 days 06:00:00 """ if minimum_time is None: minimum_time = timedelta(minutes=2) From 63a986203492ddba579d7650276ce3f5581a025b Mon Sep 17 00:00:00 2001 From: "mcavalcante.b16@gmail.com" Date: Sun, 13 Jun 2021 02:39:39 -0300 Subject: [PATCH 08/10] change simple print to progress_bar log --- pymove/query/query.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 76cb7c79..732ed110 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -309,7 +309,8 @@ def query_all_points_by_range( move_df: DataFrame, minimum_meters: Optional[float] = 100, minimum_time: Optional[timedelta] =timedelta(minutes=2), - datetime_label: Optional[Text] = DATETIME): + datetime_label: Optional[Text] = DATETIME +): """ Queries closest point within a spatial range based on meters and a temporal range. @@ -360,18 +361,13 @@ def query_all_points_by_range( result = pd.DataFrame([]) total = traj1.shape[0] count = 0 - for _, row in traj1.iterrows(): - clear_output(wait=True) - print('{} de {}'.format(count, total)) - print('{:.2f}%'.format((count * 100 / total))) + for _, row in progress_bar( + traj1.iterrows(), desc="Querying all points by temporal and spatial distance", total=traj1.shape[0] + ): coinc_points = _meters_filter(row, move_df, minimum_meters) coinc_points = _datetime_filter(row, coinc_points, minimum_time) result = coinc_points.append(result) count += 1 - clear_output(wait=True) - print('{} de {}'.format(count, total)) - print('{:.2f}%'.format((count * 100 / total))) - return result From 59c546892e8a73f3d35a552f9a1dd3e40e29b52a Mon Sep 17 00:00:00 2001 From: "mcavalcante.b16@gmail.com" Date: Sun, 13 Jun 2021 02:41:04 -0300 Subject: [PATCH 09/10] remove clear_output import --- pymove/query/query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymove/query/query.py b/pymove/query/query.py index 732ed110..11cae096 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -12,7 +12,6 @@ import numpy as np import pandas as pd -from IPython.display import clear_output from pandas import DataFrame from pymove.utils import distances From 96c2391acdd8997f0daa522e4caa16bfff0f3a17 Mon Sep 17 00:00:00 2001 From: "mcavalcante.b16@gmail.com" Date: Sun, 27 Jun 2021 00:21:26 -0300 Subject: [PATCH 10/10] add examples in range_query and knn_query --- pymove/query/query.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pymove/query/query.py b/pymove/query/query.py index a536548a..67f474f4 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -65,6 +65,25 @@ def range_query( ------ ValueError: if distance measure is invalid + Examples + -------- + >>> from pymove.query.query import range_query + >>> traj_df + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> range_query( + >>> traj_df, move_df, min_dist=80.5 + >>> ) + lat lon datetime id + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 """ result = traj.copy() result.drop(result.index, inplace=True) @@ -139,6 +158,27 @@ def knn_query( ------ ValueError: if distance measure is invalid + Examples + -------- + >>> from pymove.query.query import knn_query + >>> traj_df + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + >>> move_df + lat lon datetime id + 0 33.1 -77.0 2012-05-19 00:00:00 2 + 1 32.8 -77.1 2012-05-19 06:00:00 3 + 2 32.5 -77.3 2012-05-19 12:00:00 4 + >>> knn_query( + >>> traj_df, move_df, k=1 + >>> ) + lat lon datetime id + 0 16.4 -54.9 2014-10-11 18:00:00 1 + 1 16.4 -55.9 2014-10-12 00:00:00 1 + 2 16.4 -56.9 2014-10-12 06:00:00 1 + 2 32.5 -77.3 2012-05-19 12:00:00 4 """ k_list = pd.DataFrame([[np.Inf, 'empty']] * k, columns=['distance', TRAJ_ID])