From 901b1abda9d5a893141c4e61b3ebad774637adb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Tue, 25 May 2021 17:47:03 -0300 Subject: [PATCH 01/56] putting examples on math module --- pymove/utils/math.py | 65 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/pymove/utils/math.py b/pymove/utils/math.py index bab189c7..e43fce05 100644 --- a/pymove/utils/math.py +++ b/pymove/utils/math.py @@ -28,6 +28,19 @@ def is_number(value: Union[int, float, str]): ------- boolean True if numerical, otherwise False + + Examples + -------- + >>> from pymove.utils.math import is_number + >>> a,b,c,d = 50,22.5,'11.25','house' + >>> print(is_number(a), type(is_number(a))) + True + >>> print(is_number(b), type(is_number(b))) + True - + >>> print(is_number(c), type(is_number(c))) + True + >>> print(is_number(d), type(is_number(d))) + False """ try: float(value) @@ -55,6 +68,12 @@ def std(values_array: List[float]) -> float: squaring with * is over 3 times as fast as with **2 http://stackoverflow.com/questions/29046346/comparison-of-power-to-multiplication-in-python + Example + ------- + >>> from pymove.utils.math import std + >>> list = [7.8,9.7,6.4,5.6, 10] + >>> print(std(list), type(std(list))) + 1.7435595774162693 """ size = len(values_array) mean = sum(values_array) / size @@ -79,6 +98,12 @@ def avg_std(values_array: List[float]) -> Tuple[float, float]: float Represents the value of standard deviation. + Example + ------- + >>> from pymove.utils.math import avg_std + >>> list = [7.8,9.7,6.4,5.6, 10] + >>> print(avg_std(list), type(avg_std(list))) + 1.9493588689617927 """ avg = sum(values_array) / len(values_array) return avg, std(values_array) @@ -98,6 +123,12 @@ def std_sample(values_array: List[float]) -> float: float Represents the value of standard deviation of sample. + Example + ------- + >>> from pymove.utils.math import std_sample + >>> list = [7.8,9.7,6.4,5.6, 10] + >>> print(std_sample(list), type(std_sample(list))) + 1.9493588689617927 """ size = len(values_array) return std(values_array) * math.sqrt(size / (size - 1)) @@ -119,6 +150,12 @@ def avg_std_sample(values_array: List[float]) -> Tuple[float, float]: float Represents the standard deviation of sample. + Example + ------- + >>> from pymove.utils.math import avg_std_sample + >>> list = [7.8,9.7,6.4,5.6, 10] + >>> print(avg_std_sample(list), type(avg_std_sample(list))) + (7.9, 1.9493588689617927) """ avg = sum(values_array) / len(values_array) return avg, std_sample(values_array) @@ -143,6 +180,16 @@ def arrays_avg( float The mean of the array elements. + Examples + -------- + >>> from pymove.utils.math import arrays_avg + >>> list = [7.8,9.7,6.4,5.6, 10] + >>> weights = [0.1,0.3,0.15,0.15,0.3] + >>> print('standard average', arrays_avg(list), type(arrays_avg(list))) + standard average 7.9 + >>> print('weighted average: ', arrays_avg(list, weights), + type(arrays_avg(list, weights))) + weighted average: 1.6979999999999997 """ n = len(values_array) @@ -181,7 +228,12 @@ def array_stats(values_array: List[float]) -> Tuple[float, float, int]: The sum of the square value of each element in the array. int. The number of elements in the array. - + Example + ------- + >>> from pymove.utils.math import array_stats + >>> list = [7.8,9.7,6.4,5.6, 10] + >>> print(array_stats(list), type(array_stats(list))) + (39.5, 327.25, 5) """ sum_ = 0 sum_sq = 0 @@ -215,10 +267,11 @@ def interpolation(x0: float, y0: float, x1: float, y1: float, x: float) -> float float. Is the interpolated or extrapolated value. - Examples - -------- - - interpolation 1: (30, 3, 40, 5, 37) -> 4.4 - - interpolation 2: (30, 3, 40, 5, 35) -> 4.0 - + Example + ------- + >>> from pymove.utils.math import interpolation + >>> x0,y0,x1,y1,x = 2,4,3,6,3.5 + >>> print(interpolation(x0,y0,x1,y1,x), type(interpolation(x0,y0,x1,y1,x))) + 7.0 """ return y0 + (y1 - y0) * ((x - x0) / (x1 - x0)) From 9c41ca05c14da7746ec942216033d1f07cd1f6cb Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 25 May 2021 18:12:21 -0300 Subject: [PATCH 02/56] spacing on examples --- pymove/utils/math.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pymove/utils/math.py b/pymove/utils/math.py index e43fce05..0fb4019b 100644 --- a/pymove/utils/math.py +++ b/pymove/utils/math.py @@ -32,11 +32,11 @@ def is_number(value: Union[int, float, str]): Examples -------- >>> from pymove.utils.math import is_number - >>> a,b,c,d = 50,22.5,'11.25','house' + >>> a, b, c, d = 50, 22.5, '11.25', 'house' >>> print(is_number(a), type(is_number(a))) True >>> print(is_number(b), type(is_number(b))) - True - + True >>> print(is_number(c), type(is_number(c))) True >>> print(is_number(d), type(is_number(d))) @@ -71,7 +71,7 @@ def std(values_array: List[float]) -> float: Example ------- >>> from pymove.utils.math import std - >>> list = [7.8,9.7,6.4,5.6, 10] + >>> list = [7.8, 9.7, 6.4, 5.6, 10] >>> print(std(list), type(std(list))) 1.7435595774162693 """ @@ -101,7 +101,7 @@ def avg_std(values_array: List[float]) -> Tuple[float, float]: Example ------- >>> from pymove.utils.math import avg_std - >>> list = [7.8,9.7,6.4,5.6, 10] + >>> list = [7.8, 9.7, 6.4, 5.6, 10] >>> print(avg_std(list), type(avg_std(list))) 1.9493588689617927 """ @@ -126,7 +126,7 @@ def std_sample(values_array: List[float]) -> float: Example ------- >>> from pymove.utils.math import std_sample - >>> list = [7.8,9.7,6.4,5.6, 10] + >>> list = [7.8, 9.7, 6.4, 5.6, 10] >>> print(std_sample(list), type(std_sample(list))) 1.9493588689617927 """ @@ -153,7 +153,7 @@ def avg_std_sample(values_array: List[float]) -> Tuple[float, float]: Example ------- >>> from pymove.utils.math import avg_std_sample - >>> list = [7.8,9.7,6.4,5.6, 10] + >>> list = [7.8, 9.7, 6.4, 5.6, 10] >>> print(avg_std_sample(list), type(avg_std_sample(list))) (7.9, 1.9493588689617927) """ @@ -183,13 +183,16 @@ def arrays_avg( Examples -------- >>> from pymove.utils.math import arrays_avg - >>> list = [7.8,9.7,6.4,5.6, 10] - >>> weights = [0.1,0.3,0.15,0.15,0.3] + >>> list = [7.8, 9.7, 6.4, 5.6, 10] + >>> weights = [0.1, 0.3, 0.15, 0.15, 0.3] >>> print('standard average', arrays_avg(list), type(arrays_avg(list))) - standard average 7.9 - >>> print('weighted average: ', arrays_avg(list, weights), - type(arrays_avg(list, weights))) - weighted average: 1.6979999999999997 + 'standard average 7.9 ' + >>> print( + >>> 'weighted average: ', + >>> arrays_avg(list, weights), + >>> type(arrays_avg(list, weights)) + >>> ) + 'weighted average: 1.6979999999999997 ' """ n = len(values_array) @@ -231,7 +234,7 @@ def array_stats(values_array: List[float]) -> Tuple[float, float, int]: Example ------- >>> from pymove.utils.math import array_stats - >>> list = [7.8,9.7,6.4,5.6, 10] + >>> list = [7.8, 9.7, 6.4, 5.6, 10] >>> print(array_stats(list), type(array_stats(list))) (39.5, 327.25, 5) """ @@ -270,7 +273,7 @@ def interpolation(x0: float, y0: float, x1: float, y1: float, x: float) -> float Example ------- >>> from pymove.utils.math import interpolation - >>> x0,y0,x1,y1,x = 2,4,3,6,3.5 + >>> x0, y0, x1, y1, x = 2, 4, 3, 6, 3.5 >>> print(interpolation(x0,y0,x1,y1,x), type(interpolation(x0,y0,x1,y1,x))) 7.0 """ From 64e4acc599d071c590a6e327dd5740ad48a7200f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Wed, 26 May 2021 23:52:29 -0300 Subject: [PATCH 03/56] putting examples on mem module --- pymove/utils/mem.py | 83 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/pymove/utils/mem.py b/pymove/utils/mem.py index f98ece71..edeaf1ff 100644 --- a/pymove/utils/mem.py +++ b/pymove/utils/mem.py @@ -38,6 +38,24 @@ def reduce_mem_usage_automatic(df: DataFrame): df : dataframe The input data to which the operation will be performed. + Examples + -------- + >>> from pymove.utils.mem import reduce_mem_usage_automatic + >>> df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + 5 39.984710 116.319865 2008-10-23 05:53:23 1 + 6 39.984674 116.319810 2008-10-23 05:53:28 1 + 7 39.984623 116.319773 2008-10-23 05:53:33 1 + 8 39.984606 116.319732 2008-10-23 05:53:38 1 + >>> reduce_mem_usage_automatic(df) + Memory usage of dataframe is 0.00 MB + Memory usage after optimization is: 0.00 MB + Decreased by 26.0 % """ start_mem = df.memory_usage().sum() / 1024 ** 2 logger.info('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) @@ -138,6 +156,23 @@ def total_size( float The memory used by the given object + Examples + -------- + >>> from pymove.utils.mem import total_size + >>> df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + 5 39.984710 116.319865 2008-10-23 05:53:23 1 + 6 39.984674 116.319810 2008-10-23 05:53:28 1 + 7 39.984623 116.319773 2008-10-23 05:53:33 1 + 8 39.984606 116.319732 2008-10-23 05:53:38 1 + >>> total_size(df) + Size in bytes: 432, Type: + 432 """ if handlers is None: handlers = {} @@ -194,7 +229,19 @@ def begin_operation(name: Text) -> Dict: ------- dict dictionary with the operation stats - + Examples + -------- + >>> from pymove.utils.mem import begin_operation + >>> print(begin_operation('move_data'), type(begin_operation('move_data'))) + {'process': psutil.Process(pid=103401, name='python', + status='running', started='21:48:11'), + 'init': 293732352, 'start': 1622082973.8825781, 'name': 'move_data'} + + >>> print(begin_operation('mdf'), type(begin_operation('mdf'))) + {'process': psutil.Process(pid=103401, name='python', + status='running', started='21:48:11'), + 'init': 293732352, 'start': 1622082973.8850513, 'name': 'mdf'} + """ process = psutil.Process(os.getpid()) init = process.memory_info()[0] @@ -216,6 +263,17 @@ def end_operation(operation: Dict) -> Dict: dict dictionary with the operation execution stats + Examples + -------- + >>> from pymove.utils.mem import end_operation + >>> stats = {'process': psutil.Process(pid=103401, name='python', + status='running', started='21:48:11'), + 'init': 293732352, + 'start': 1622083075.4811873, + 'name': 'move_data'} + >>> print(end_operation(stats), type(end_operation(stats))) + {'name': 'move_data', 'time in seconds': 0.0014350414276123047, + 'memory': '0.0 B'} """ finish = operation['process'].memory_info()[0] last_operation_name = operation['name'] @@ -244,7 +302,11 @@ def sizeof_fmt(mem_usage: int, suffix: Optional[Text] = 'B') -> Text: ------- str A string of the memory usage in a more readable format - + Examples + -------- + >>> from pymove.utils.mem import sizeof_fmt + >>> print(sizeof_fmt(6.64,'MB'), type(sizeof_fmt(6.64,'MB'))) + 6.6 MB """ for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: if abs(mem_usage) < 1024.0: @@ -272,7 +334,22 @@ def top_mem_vars( ------- DataFrame dataframe with variables names and sizes - + Examples + -------- + >>> from pymove.utils.mem import top_mem_vars + >>> print(top_mem_vars(globals()), type(top_mem_vars(globals()))) + var mem + 0 Out 1.1 KiB + 1 In 776.0 B + 2 df2 432.0 B + 3 df 304.0 B + 4 stats 232.0 B + 5 reduce_mem_usage_automatic 136.0 B + 6 total_size 136.0 B + 7 begin_operation 136.0 B + 8 end_operation 136.0 B + 9 sizeof_fmt 136.0 B + """ if variables is None: variables = globals() From 5bf80d7ce52c55ba0d5237e55b3f81875954987c Mon Sep 17 00:00:00 2001 From: flych3r Date: Thu, 27 May 2021 11:07:14 -0300 Subject: [PATCH 04/56] clarified some examples --- pymove/utils/mem.py | 121 ++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 71 deletions(-) diff --git a/pymove/utils/mem.py b/pymove/utils/mem.py index edeaf1ff..d14e74f5 100644 --- a/pymove/utils/mem.py +++ b/pymove/utils/mem.py @@ -23,11 +23,6 @@ from pymove.utils.log import logger -try: - pass -except ImportError: - pass - def reduce_mem_usage_automatic(df: DataFrame): """ @@ -40,22 +35,20 @@ def reduce_mem_usage_automatic(df: DataFrame): Examples -------- + >>> import numpy as np + >>> import pandas as pd >>> from pymove.utils.mem import reduce_mem_usage_automatic - >>> df - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - 5 39.984710 116.319865 2008-10-23 05:53:23 1 - 6 39.984674 116.319810 2008-10-23 05:53:28 1 - 7 39.984623 116.319773 2008-10-23 05:53:33 1 - 8 39.984606 116.319732 2008-10-23 05:53:38 1 + >>> df = pd.DataFrame({'col_1': np.arange(10000, dtype=np.float64)}) + >>> df.dtytes + col_1 float64 + dtype: object >>> reduce_mem_usage_automatic(df) - Memory usage of dataframe is 0.00 MB - Memory usage after optimization is: 0.00 MB - Decreased by 26.0 % + 'Memory usage of dataframe is 0.08 MB' + 'Memory usage after optimization is: 0.02 MB' + 'Decreased by 74.9 %' + >>> df.dtytes + col_1 float16 + dtype: object """ start_mem = df.memory_usage().sum() / 1024 ** 2 logger.info('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) @@ -158,20 +151,12 @@ def total_size( Examples -------- + >>> import numpy as np >>> from pymove.utils.mem import total_size - >>> df - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - 5 39.984710 116.319865 2008-10-23 05:53:23 1 - 6 39.984674 116.319810 2008-10-23 05:53:28 1 - 7 39.984623 116.319773 2008-10-23 05:53:33 1 - 8 39.984606 116.319732 2008-10-23 05:53:38 1 - >>> total_size(df) - Size in bytes: 432, Type: + >>> arr = np.arange(10000, dtype=np.float64) + >>> sz = total_size(arr) + 'Size in bytes: 80104, Type: ' + >>> sz 432 """ if handlers is None: @@ -232,16 +217,14 @@ def begin_operation(name: Text) -> Dict: Examples -------- >>> from pymove.utils.mem import begin_operation - >>> print(begin_operation('move_data'), type(begin_operation('move_data'))) - {'process': psutil.Process(pid=103401, name='python', - status='running', started='21:48:11'), - 'init': 293732352, 'start': 1622082973.8825781, 'name': 'move_data'} - - >>> print(begin_operation('mdf'), type(begin_operation('mdf'))) - {'process': psutil.Process(pid=103401, name='python', - status='running', started='21:48:11'), - 'init': 293732352, 'start': 1622082973.8850513, 'name': 'mdf'} - + >>> operation = begin_operation('operation') + >>> operation + { + 'process': psutil.Process( + pid=103401, name='python', status='running', started='21:48:11' + ), + 'init': 293732352, 'start': 1622082973.8825781, 'name': 'operation' + } """ process = psutil.Process(os.getpid()) init = process.memory_info()[0] @@ -265,15 +248,14 @@ def end_operation(operation: Dict) -> Dict: Examples -------- - >>> from pymove.utils.mem import end_operation - >>> stats = {'process': psutil.Process(pid=103401, name='python', - status='running', started='21:48:11'), - 'init': 293732352, - 'start': 1622083075.4811873, - 'name': 'move_data'} - >>> print(end_operation(stats), type(end_operation(stats))) - {'name': 'move_data', 'time in seconds': 0.0014350414276123047, - 'memory': '0.0 B'} + >>> import numpy as np + >>> import time + >>> from pymove.utils.mem import begin_operation, end_operation + >>> operation = begin_operation('create_arr') + >>> arr = np.arange(100000, dtype=np.float64) + >>> time.sleep(1.2) + >>> end_operation(operation) + {'name': 'create_arr', 'time in seconds': 1.2022554874420166, 'memory': '752.0 KiB'} """ finish = operation['process'].memory_info()[0] last_operation_name = operation['name'] @@ -305,8 +287,10 @@ def sizeof_fmt(mem_usage: int, suffix: Optional[Text] = 'B') -> Text: Examples -------- >>> from pymove.utils.mem import sizeof_fmt - >>> print(sizeof_fmt(6.64,'MB'), type(sizeof_fmt(6.64,'MB'))) - 6.6 MB + >>> sizeof_fmt(1024) + 1.0 KiB + >>> sizeof_fmt(2e6) + 1.9 MiB """ for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: if abs(mem_usage) < 1024.0: @@ -316,15 +300,15 @@ def sizeof_fmt(mem_usage: int, suffix: Optional[Text] = 'B') -> Text: def top_mem_vars( - variables: Optional[Callable] = None, n: Optional[int] = 10, hide_private=True + variables: Callable, n: Optional[int] = 10, hide_private=True ) -> DataFrame: """ Shows the sizes of the active variables. Parameters ---------- - variables: locals() or globals(), optional - Whether to shows local or global variables, by default globals() + variables: locals() or globals() + Whether to shows local or global variables n: int, optional number of variables to show, by default hide_private: bool, optional @@ -336,23 +320,18 @@ def top_mem_vars( dataframe with variables names and sizes Examples -------- + >>> import numpy as np >>> from pymove.utils.mem import top_mem_vars - >>> print(top_mem_vars(globals()), type(top_mem_vars(globals()))) - var mem - 0 Out 1.1 KiB - 1 In 776.0 B - 2 df2 432.0 B - 3 df 304.0 B - 4 stats 232.0 B - 5 reduce_mem_usage_automatic 136.0 B - 6 total_size 136.0 B - 7 begin_operation 136.0 B - 8 end_operation 136.0 B - 9 sizeof_fmt 136.0 B - + >>> arr = np.arange(100000, dtype=np.float64) + >>> long_string = 'Hello World!' * 100 + >>> top_mem_vars(locals()) + var mem + 0 arr 781.4 KiB + 1 long_string 1.2 KiB + 2 local 416.0 B + 3 top_mem_vars 136.0 B + 4 np 72.0 B """ - if variables is None: - variables = globals() vars_ = ((name, getsizeof(value)) for name, value in variables.items()) if hide_private: vars_ = filter(lambda x: not x[0].startswith('_'), vars_) From 17be59f0441abd365a15853450263cca9a1d5ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Thu, 27 May 2021 18:17:45 -0300 Subject: [PATCH 05/56] putting examples on trajectories module --- pymove/utils/trajectories.py | 108 +++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 10 deletions(-) diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index 04415b45..7c5acaee 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -70,6 +70,17 @@ def read_csv( MoveDataFrameAbstract subclass Trajectory data + Examples + -------- + >>> from pymove.utils.trajectories import read_csv + >>> move_df = read_csv('...geolife_sample.csv') + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 """ data = _read_csv( filepath_or_buffer, @@ -95,6 +106,12 @@ def invert_dict(d: Dict) -> Dict: dict inverted dict + Examples + -------- + >>> from pymove.utils.trajectories import invert_dict + >>> traj_dict = {'lat':39.984094, 'lon':116.319236} + >>> invert_dict(traj_dict) + {39.984094: 'lat', 116.319236: 'lon'} """ return {v: k for k, v in d.items()} @@ -125,10 +142,10 @@ def flatten_dict( Examples -------- - >>> d = { 'a': 1, 'b': { 'c': 2, 'd': 3}} - >>> flatten_dict(d) - { 'a': 1, 'b_c': 2, 'b_d': 3 } - + >>> from pymove.utils.trajectories import flatten_dict + >>> traj_dict = {'lat':39.984094, 'lon':116.319236} + >>> flatten_dict(traj_dict, 'x') + {'x_lat': 39.984094, 'x_lon': 116.319236} """ if not isinstance(d, dict): return {parent_key: d} @@ -164,12 +181,28 @@ def flatten_columns(data: DataFrame, columns: List) -> DataFrame: Examples -------- - >>> d = {'a': 1, 'b': {'c': 2, 'd': 3}} - >>>> data = pd.DataFrame({'col1': [1], 'col2': [d]}) - >>>> flatten_columns(data, ['col2']) - col1 col2_b_d col2_a col2_b_c - 0 1 3 1 2 - + >>> from pymove.utils.trajectories import flatten_columns + >>> move_df + lat lon datetime id dict_column + 0 39.984094 116.319236 2008-10-23 05:53:05 1 {'a': 1} + 1 39.984198 116.319322 2008-10-23 05:53:06 1 {'b': 2} + 2 39.984224 116.319402 2008-10-23 05:53:11 1 {'c': 3} + 3 39.984211 116.319389 2008-10-23 05:53:16 1 {'d': 4} + 4 39.984217 116.319422 2008-10-23 05:53:21 1 {'e': 5} + >>> flatten_columns(moveDf, columns = 'dict_column') + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + + dict_column_b dict_column_d dict_column_e dict_column_a dict_column_c + 0 NaN NaN NaN 1.0 NaN + 1 2.0 NaN NaN NaN NaN + 2 NaN NaN NaN NaN 3.0 + 3 NaN 4.0 NaN NaN NaN + 4 NaN NaN 5.0 NaN NaN """ data = data.copy() if not isinstance(columns, list): @@ -218,6 +251,16 @@ def shift( ---------- https://stackoverflow.com/questions/30399534/shift-elements-in-a-numpy-array + Examples + -------- + >>> from pymove.utils.trajectories import shift + >>> array = [1,2,3,4,5,6,7] + >>> print(shift(array, 1), type(shift(array, 1))) + [0 1 2 3 4 5 6] + >>> print(shift(array, 2), type(shift(array, 2))) + [0 0 1 2 3 4 5] + >>> print(shift(array, 3), type(shift(array, 3))) + [0 0 0 1 2 3 4] """ result = np.empty_like(arr) if fill_value is None: @@ -254,6 +297,18 @@ def fill_list_with_new_values(original_list: List, new_list_values: List): new_list_values : list. The list from which elements will be copied + Returns + ------- + The original list with the content of a secondary list + + Example + ------- + >>> from pymove.utils.trajectories import fill_list_with_new_values + >>> original_list = [4,3,2,1,0] + >>> new_list = [9,8,7,6,5] + >>> fill_list_with_new_values(original_list, new_list) + >>> print(original_list, type(original_list)) + ['oveD'] [9, 8, 7, 6, 5] """ n = len(new_list_values) original_list[:n] = new_list_values @@ -272,6 +327,16 @@ def object_for_array(object_: Text) -> ndarray: ------- array object converted to a list + + Examples + -------- + >>> from pymove.utils.trajectories import object_for_array + >>> print(object_for_array('lat'), type(object_for_array('lat'))) + ['a'] + >>> print(object_for_array('move'), type(object_for_array('move'))) + ['ov'] + >>> print(object_for_array('moveDf'), type(object_for_array('moveDf'))) + ['oveD'] """ if object_ is None: return object_ @@ -295,6 +360,29 @@ def column_to_array(data: DataFrame, column: Text): column : str Label of data referring to the column for conversion + + Returns + ------- + dataframe + Dataframe with the new column... + + Example + ------- + >>> from pymove.utils.trajectories import column_to_array + >>> move_df + lat lon datetime id list_column + 0 39.984094 116.319236 2008-10-23 05:53:05 1 [1,2] + 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3,4] + 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5,6] + 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7,8] + 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9,10] + >>> column_to_array(moveDf, column = 'list_column') + lat lon datetime id list_column + 0 39.984094 116.319236 2008-10-23 05:53:05 1 [1.0,2.0] + 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3.0,4.0] + 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5.0,6.0] + 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7.0,8.0] + 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9.0,10.0] """ data = data.copy() if column not in data: From 04b62a0089ca37e2057d566edc62fb017ed2a0ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Thu, 27 May 2021 21:31:30 -0300 Subject: [PATCH 06/56] putting examples on trajectories module --- pymove/utils/trajectories.py | 40 +++++++++++++++++------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index 7c5acaee..f3da3a74 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -73,7 +73,7 @@ def read_csv( Examples -------- >>> from pymove.utils.trajectories import read_csv - >>> move_df = read_csv('...geolife_sample.csv') + >>> move_df = read_csv('geolife_sample.csv') >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -109,9 +109,9 @@ def invert_dict(d: Dict) -> Dict: Examples -------- >>> from pymove.utils.trajectories import invert_dict - >>> traj_dict = {'lat':39.984094, 'lon':116.319236} + >>> traj_dict = {'a': 1, 'b': 2} >>> invert_dict(traj_dict) - {39.984094: 'lat', 116.319236: 'lon'} + {1: 'a, 2: 'b} """ return {v: k for k, v in d.items()} @@ -143,9 +143,9 @@ def flatten_dict( Examples -------- >>> from pymove.utils.trajectories import flatten_dict - >>> traj_dict = {'lat':39.984094, 'lon':116.319236} + >>> d = {'a': 1, 'b': {'c': 2, 'd': 3}} >>> flatten_dict(traj_dict, 'x') - {'x_lat': 39.984094, 'x_lon': 116.319236} + {'x_a': 1, 'x_b_c': 2, 'x_b_d': 3} """ if not isinstance(d, dict): return {parent_key: d} @@ -257,10 +257,10 @@ def shift( >>> array = [1,2,3,4,5,6,7] >>> print(shift(array, 1), type(shift(array, 1))) [0 1 2 3 4 5 6] - >>> print(shift(array, 2), type(shift(array, 2))) - [0 0 1 2 3 4 5] - >>> print(shift(array, 3), type(shift(array, 3))) - [0 0 0 1 2 3 4] + >>> print(shift(array, 0), type(shift(array, 0))) + [1, 2, 3, 4, 5, 6, 7] + >>> print(shift(array, -1), type(shift(array, -1))) + [2 3 4 5 6 7 0] """ result = np.empty_like(arr) if fill_value is None: @@ -304,11 +304,10 @@ def fill_list_with_new_values(original_list: List, new_list_values: List): Example ------- >>> from pymove.utils.trajectories import fill_list_with_new_values - >>> original_list = [4,3,2,1,0] - >>> new_list = [9,8,7,6,5] - >>> fill_list_with_new_values(original_list, new_list) - >>> print(original_list, type(original_list)) - ['oveD'] [9, 8, 7, 6, 5] + >>> lt = [1, 2, 3, 4] + >>> fill_list_with_new_values(lt, ['a','b']) + >>> print(lt, type(lt)) + ['a', 'b', 3, 4] """ n = len(new_list_values) original_list[:n] = new_list_values @@ -331,12 +330,11 @@ def object_for_array(object_: Text) -> ndarray: Examples -------- >>> from pymove.utils.trajectories import object_for_array - >>> print(object_for_array('lat'), type(object_for_array('lat'))) - ['a'] - >>> print(object_for_array('move'), type(object_for_array('move'))) - ['ov'] - >>> print(object_for_array('moveDf'), type(object_for_array('moveDf'))) - ['oveD'] + >>> list_str = '[1,2,3,4,5]' + >>> object_for_array(list_str) + array([1., 2., 3., 4., 5.], dtype=float32) + >>> print(type(object_for_array(list_str))) + """ if object_ is None: return object_ @@ -364,7 +362,7 @@ def column_to_array(data: DataFrame, column: Text): Returns ------- dataframe - Dataframe with the new column... + Dataframe with the selected column converted to list Example ------- From ccda986bd3e95c53e32fa41d726bb19918e4b810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Thu, 27 May 2021 23:17:06 -0300 Subject: [PATCH 07/56] putting examples on trajectories module --- pymove/utils/trajectories.py | 71 ++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index f3da3a74..01fe8575 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -75,12 +75,12 @@ def read_csv( >>> from pymove.utils.trajectories import read_csv >>> move_df = read_csv('geolife_sample.csv') >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 """ data = _read_csv( filepath_or_buffer, @@ -144,8 +144,8 @@ def flatten_dict( -------- >>> from pymove.utils.trajectories import flatten_dict >>> d = {'a': 1, 'b': {'c': 2, 'd': 3}} - >>> flatten_dict(traj_dict, 'x') - {'x_a': 1, 'x_b_c': 2, 'x_b_d': 3} + >>> d + {'a': 1, 'b': {'c': 2, 'd': 3}} """ if not isinstance(d, dict): return {parent_key: d} @@ -183,26 +183,25 @@ def flatten_columns(data: DataFrame, columns: List) -> DataFrame: -------- >>> from pymove.utils.trajectories import flatten_columns >>> move_df - lat lon datetime id dict_column - 0 39.984094 116.319236 2008-10-23 05:53:05 1 {'a': 1} - 1 39.984198 116.319322 2008-10-23 05:53:06 1 {'b': 2} - 2 39.984224 116.319402 2008-10-23 05:53:11 1 {'c': 3} - 3 39.984211 116.319389 2008-10-23 05:53:16 1 {'d': 4} - 4 39.984217 116.319422 2008-10-23 05:53:21 1 {'e': 5} - >>> flatten_columns(moveDf, columns = 'dict_column') - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - - dict_column_b dict_column_d dict_column_e dict_column_a dict_column_c - 0 NaN NaN NaN 1.0 NaN - 1 2.0 NaN NaN NaN NaN - 2 NaN NaN NaN NaN 3.0 - 3 NaN 4.0 NaN NaN NaN - 4 NaN NaN 5.0 NaN NaN + lat lon datetime id dict_column + 0 39.984094 116.319236 2008-10-23 05:53:05 1 {'a': 1} + 1 39.984198 116.319322 2008-10-23 05:53:06 1 {'b': 2} + 2 39.984224 116.319402 2008-10-23 05:53:11 1 {'c': 3, 'a': 4} + 3 39.984211 116.319389 2008-10-23 05:53:16 1 {'b': 2} + 4 39.984217 116.319422 2008-10-23 05:53:21 1 {'a': 3, 'c': 2} + >>> flatten_columns(move_df, columns='dict_column') + lat lon datetime id + dict_column_b dict_column_c dict_column_a + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + NaN NaN 1.0 + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 2.0 NaN NaN + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + NaN 3.0 4.0 + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 2.0 NaN NaN + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + NaN 2.0 3.0 """ data = data.copy() if not isinstance(columns, list): @@ -370,17 +369,17 @@ def column_to_array(data: DataFrame, column: Text): >>> move_df lat lon datetime id list_column 0 39.984094 116.319236 2008-10-23 05:53:05 1 [1,2] - 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3,4] - 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5,6] - 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7,8] - 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9,10] + 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3,4] + 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5,6] + 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7,8] + 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9,10] >>> column_to_array(moveDf, column = 'list_column') lat lon datetime id list_column 0 39.984094 116.319236 2008-10-23 05:53:05 1 [1.0,2.0] - 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3.0,4.0] - 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5.0,6.0] - 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7.0,8.0] - 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9.0,10.0] + 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3.0,4.0] + 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5.0,6.0] + 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7.0,8.0] + 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9.0,10.0] """ data = data.copy() if column not in data: From 65f6ce424468e217e08f030c21920db6b442b66a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Fri, 28 May 2021 07:21:33 -0300 Subject: [PATCH 08/56] putting examples on trajectories module --- pymove/utils/trajectories.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index 01fe8575..e532326b 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -144,8 +144,8 @@ def flatten_dict( -------- >>> from pymove.utils.trajectories import flatten_dict >>> d = {'a': 1, 'b': {'c': 2, 'd': 3}} - >>> d - {'a': 1, 'b': {'c': 2, 'd': 3}} + >>> flatten_dict(d) + {'a': 1, 'b_c': 2, 'b_d': 3} """ if not isinstance(d, dict): return {parent_key: d} From 74b63ae5648eb5b6db9c04de6e1a2045649078fd Mon Sep 17 00:00:00 2001 From: flych3r Date: Fri, 28 May 2021 11:09:56 -0300 Subject: [PATCH 09/56] return --- pymove/utils/trajectories.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index 7c5acaee..b9d0f09f 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -36,7 +36,7 @@ def read_csv( type_: Optional[Text] = TYPE_PANDAS, n_partitions: Optional[int] = 1, **kwargs -): +) -> MoveDataFrame: """ Reads a `csv` file and structures the data. @@ -349,7 +349,7 @@ def object_for_array(object_: Text) -> ndarray: return conv.astype('object_') -def column_to_array(data: DataFrame, column: Text): +def column_to_array(data: DataFrame, column: Text) -> DataFrame: """ Transforms all columns values to list. @@ -377,12 +377,12 @@ def column_to_array(data: DataFrame, column: Text): 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7,8] 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9,10] >>> column_to_array(moveDf, column = 'list_column') - lat lon datetime id list_column - 0 39.984094 116.319236 2008-10-23 05:53:05 1 [1.0,2.0] - 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3.0,4.0] - 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5.0,6.0] - 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7.0,8.0] - 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9.0,10.0] + lat lon datetime id list_column + 0 39.984094 116.319236 2008-10-23 05:53:05 1 [1.0,2.0] + 1 39.984198 116.319322 2008-10-23 05:53:06 1 [3.0,4.0] + 2 39.984224 116.319402 2008-10-23 05:53:11 1 [5.0,6.0] + 3 39.984211 116.319389 2008-10-23 05:53:16 1 [7.0,8.0] + 4 39.984217 116.319422 2008-10-23 05:53:21 1 [9.0,10.0] """ data = data.copy() if column not in data: From 64580009692821507a36e14439dc06d544b6c7a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Mon, 31 May 2021 23:58:41 -0300 Subject: [PATCH 10/56] correcting formation wrongs --- pymove/utils/datetime.py | 81 +++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/pymove/utils/datetime.py b/pymove/utils/datetime.py index 4c6835fc..d4b71445 100644 --- a/pymove/utils/datetime.py +++ b/pymove/utils/datetime.py @@ -524,17 +524,17 @@ def create_time_slot_in_minute( >>> from pymove.utils.datetime import create_time_slot_in_minute >>> from pymove import datetime >>> data - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:44:05 1 - 1 39.984198 116.319322 2008-10-23 05:56:06 1 - 2 39.984224 116.319402 2008-10-23 05:56:11 1 - 3 39.984224 116.319402 2008-10-23 06:10:15 1 + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:44:05 1 + 1 39.984198 116.319322 2008-10-23 05:56:06 1 + 2 39.984224 116.319402 2008-10-23 05:56:11 1 + 3 39.984224 116.319402 2008-10-23 06:10:15 1 >>> datetime.create_time_slot_in_minute(data, inplace=False) - lat lon datetime id time_slot - 0 39.984094 116.319236 2008-10-23 05:44:05 1 22 - 1 39.984198 116.319322 2008-10-23 05:56:06 1 23 - 2 39.984224 116.319402 2008-10-23 05:56:11 1 23 - 3 39.984224 116.319402 2008-10-23 06:10:15 1 24 + lat lon datetime id time_slot + 0 39.984094 116.319236 2008-10-23 05:44:05 1 22 + 1 39.984198 116.319322 2008-10-23 05:56:06 1 23 + 2 39.984224 116.319402 2008-10-23 05:56:11 1 23 + 3 39.984224 116.319402 2008-10-23 06:10:15 1 24 """ if data.dtypes[label_datetime] != 'datetime64[ns]': raise ValueError('{} colum must be of type datetime'.format(label_datetime)) @@ -572,17 +572,21 @@ def generate_time_statistics( ------- >>> from pymove.utils.datetime import generate_time_statistics >>> df - local_label prev_local time_to_prev id - 0 house NaN NaN 1 - 1 market house 720.0 1 - 2 market market 5.0 1 - 3 market market 1.0 1 - 4 school market 844.0 1 + local_label prev_local time_to_prev id + 0 house NaN NaN 1 + 1 market house 720.0 1 + 2 market market 5.0 1 + 3 market market 1.0 1 + 4 school market 844.0 1 >>> generate_time_statistics(df) - local_label prev_local mean std min max sum count - 0 house market 844.0 0.000000 844.0 844.0 844.0 1 - 1 market house 720.0 0.000000 720.0 720.0 720.0 1 - 2 market market 3.0 2.828427 1.0 5.0 6.0 2 + local_label prev_local mean std \ + min max sum count + 0 house market 844.0 0.000000 \ + 844.0 844.0 844.0 1 + 1 market house 720.0 0.000000 \ + 720.0 720.0 720.0 1 + 2 market market 3.0 2.828427 \ + 1.0 5.0 6.0 2 """ df_statistics = data.groupby( [local_label, PREV_LOCAL] @@ -625,7 +629,6 @@ def _calc_time_threshold(seg_mean: float, seg_std: float) -> float: 2.5 >>> print(_calc_time_threshold(-2,2)) 0.0 - """ threshold = seg_std + seg_mean threshold = float('{:.1f}'.format(threshold)) @@ -665,27 +668,27 @@ def threshold_time_statistics( ------- >>> from pymove.utils.datetime import generate_time_statistics >>> df - local_label prev_local time_to_prev id - 0 house NaN NaN 1 - 1 market house 720.0 1 - 2 market market 5.0 1 - 3 market market 1.0 1 - 4 school market 844.0 1 + local_label prev_local time_to_prev id + 0 house NaN NaN 1 + 1 market house 720.0 1 + 2 market market 5.0 1 + 3 market market 1.0 1 + 4 school market 844.0 1 >>> statistics = generate_time_statistics(df) >>> statistics - local_label prev_local mean std min max sum count - 0 house market 844.0 0.000000 844.0 844.0 844.0 1 - 1 market house 720.0 0.000000 720.0 720.0 720.0 1 - 2 market market 3.0 2.828427 1.0 5.0 6.0 2 + local_label prev_local mean std min max sum count + 0 house market 844.0 0.000000 844.0 844.0 844.0 1 + 1 market house 720.0 0.000000 720.0 720.0 720.0 1 + 2 market market 3.0 2.828427 1.0 5.0 6.0 2 >>> threshold_time_statistics(statistics) - local_label prev_local mean std min max sum count - 0 house market 844.0 0.000000 844.0 844.0 844.0 1 - 1 market house 720.0 0.000000 720.0 720.0 720.0 1 - 2 market market 3.0 2.828427 1.0 5.0 6.0 2 - threshold - 0 844.0 - 1 720.0 - 2 5.8 + local_label prev_local mean std min \ + max sum count threshold + 0 house market 844.0 0.000000 844.0 \ + 844.0 844.0 1 844.0 + 1 market house 720.0 0.000000 720.0 \ + 720.0 720.0 1 720.0 + 2 market market 3.0 2.828427 1.0 \ + 5.0 6.0 2 5.8 """ if not inplace: df_statistics = df_statistics.copy() From 44b60b122ceed3c7a1315f2cc41926edd4b5f22f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Tue, 1 Jun 2021 20:52:53 -0300 Subject: [PATCH 11/56] putting examples on visual module --- pymove/utils/visual.py | 56 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 0bc91efc..77a31ca9 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -146,6 +146,14 @@ def generate_color() -> Text: Returns ------- Random HEX color + + Examples + -------- + >>> from pymove.utils.visual import generate_color + >>> print(generate_color(), type(generate_color())) + #E0FFFF + >>> print(generate_color(), type(generate_color())) + #808000 """ return COLORS[randint(0, len(COLORS))] @@ -167,9 +175,11 @@ def rgb(rgb_colors: Tuple[float, float, float]) -> Tuple[int, int, int]: Examples -------- - >>> from pymove.visualization.visualization import rgb - >>> rgb([0.6,0.2,0.2]) - (51, 51, 153) + >>> from pymove.utils.visual import rgb + >>> print(rgb((0.1,0.2,0.7)), type(rgb((0.1,0.2,0.7)))) + (51, 178, 25) + >>> print(rgb((0.5,0.4,0.1)), type(rgb((0.5,0.4,0.1)))) + (102, 25, 127) """ blue = rgb_colors[0] red = rgb_colors[1] @@ -194,9 +204,11 @@ def hex_rgb(rgb_colors: Tuple[float, float, float]) -> Text: Examples -------- - >>> from pymove.visualization.visualization import hex_rgb - >>> hex_rgb([0.6,0.2,0.2]) - '#333399' + >>> from pymove.utils.visual import hex_rgb + >>> print(hex_rgb((0.1,0.2,0.7)), type(hex_rgb((0.1,0.2,0.7)))) + #33B219 + >>> print(hex_rgb((0.5,0.4,0.1)), type(hex_rgb((0.5,0.4,0.1)))) + #66197F """ return '#%02X%02X%02X' % rgb(rgb_colors) @@ -216,6 +228,16 @@ def cmap_hex_color(cmap: ListedColormap, i: int) -> Text: ------- str Represents corresponding hex str + + Examples + -------- + >>> from pymove.utils.visual import cmap_hex_color + >>> # import matplotlib.pyplot as plt + >>> # jet = plt.get_cmap('jet') // It comand generates a Linear Segmented Colormap + >>> print(cmap_hex_color(jet,0), type(cmap_hex_color(jet,0))) + #000080 + >>> print(cmap_hex_color(jet,1), type(cmap_hex_color(jet,1))) + #000084 """ return rgb2hex(cmap(i)) @@ -233,6 +255,13 @@ def get_cmap(cmap: Text) -> Colormap: ------- Colormap matplotlib colormap + + Examples + -------- + >>> from pymove.utils.visual import get_cmap + >>> print(get_cmap('Greys'), type(get_cmap('Greys'))) + + """ return _get_cmap(cmap) @@ -252,6 +281,21 @@ def save_wkt( label_id : str Represents column name of trajectory id + Returns + ------- + File: A file.wkt that contains geometric points that build a map visualization + + Examples + -------- + >>> from pymove.utils.visual import save_wkt + >>> df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> save_wkt(df, 'test', 'id') """ str_ = '%s;linestring\n' % label_id ids = move_data[label_id].unique() From 0960e0f9d59afce5a82c95060067c3a57122d63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Tue, 1 Jun 2021 22:24:41 -0300 Subject: [PATCH 12/56] putting examples on visual module --- pymove/utils/visual.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 77a31ca9..808202ab 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -149,11 +149,11 @@ def generate_color() -> Text: Examples -------- - >>> from pymove.utils.visual import generate_color + >>> from pymove.utils.visual import generate_color >>> print(generate_color(), type(generate_color())) - #E0FFFF + '#E0FFFF' >>> print(generate_color(), type(generate_color())) - #808000 + '#808000' """ return COLORS[randint(0, len(COLORS))] @@ -175,10 +175,10 @@ def rgb(rgb_colors: Tuple[float, float, float]) -> Tuple[int, int, int]: Examples -------- - >>> from pymove.utils.visual import rgb - >>> print(rgb((0.1,0.2,0.7)), type(rgb((0.1,0.2,0.7)))) + >>> from pymove.utils.visual import rgb + >>> print(rgb((0.1, 0.2, 0.7)), type(rgb((0.1, 0.2, 0.7)))) (51, 178, 25) - >>> print(rgb((0.5,0.4,0.1)), type(rgb((0.5,0.4,0.1)))) + >>> print(rgb((0.5, 0.4, 0.1)), type(rgb((0.5, 0.4, 0.1)))) (102, 25, 127) """ blue = rgb_colors[0] @@ -205,10 +205,10 @@ def hex_rgb(rgb_colors: Tuple[float, float, float]) -> Text: Examples -------- >>> from pymove.utils.visual import hex_rgb - >>> print(hex_rgb((0.1,0.2,0.7)), type(hex_rgb((0.1,0.2,0.7)))) - #33B219 - >>> print(hex_rgb((0.5,0.4,0.1)), type(hex_rgb((0.5,0.4,0.1)))) - #66197F + >>> print(hex_rgb((0.1, 0.2, 0.7)), type(hex_rgb((0.1, 0.2, 0.7)))) + '#33B219' + >>> print(hex_rgb((0.5, 0.4, 0.1)), type(hex_rgb((0.5, 0.4, 0.1)))) + '#66197F' """ return '#%02X%02X%02X' % rgb(rgb_colors) @@ -233,11 +233,11 @@ def cmap_hex_color(cmap: ListedColormap, i: int) -> Text: -------- >>> from pymove.utils.visual import cmap_hex_color >>> # import matplotlib.pyplot as plt - >>> # jet = plt.get_cmap('jet') // It comand generates a Linear Segmented Colormap + >>> # jet = plt.get_cmap('jet') // This comand generates a Linear Segmented Colormap >>> print(cmap_hex_color(jet,0), type(cmap_hex_color(jet,0))) - #000080 + '#000080' >>> print(cmap_hex_color(jet,1), type(cmap_hex_color(jet,1))) - #000084 + '#000084' """ return rgb2hex(cmap(i)) @@ -295,7 +295,7 @@ def save_wkt( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> save_wkt(df, 'test', 'id') + >>> save_wkt(df, 'test.wkt', 'id') """ str_ = '%s;linestring\n' % label_id ids = move_data[label_id].unique() From cb069cf0b82aaf7c44780bb4da0b34dd6a1fdd6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Carvalho?= Date: Tue, 1 Jun 2021 22:27:02 -0300 Subject: [PATCH 13/56] Update visual.py --- pymove/utils/visual.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 808202ab..12d3211a 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -234,10 +234,10 @@ def cmap_hex_color(cmap: ListedColormap, i: int) -> Text: >>> from pymove.utils.visual import cmap_hex_color >>> # import matplotlib.pyplot as plt >>> # jet = plt.get_cmap('jet') // This comand generates a Linear Segmented Colormap - >>> print(cmap_hex_color(jet,0), type(cmap_hex_color(jet,0))) - '#000080' - >>> print(cmap_hex_color(jet,1), type(cmap_hex_color(jet,1))) - '#000084' + >>> print(cmap_hex_color(jet,0)) + '#000080' + >>> print(cmap_hex_color(jet,1)) + '#000084' """ return rgb2hex(cmap(i)) From a294f9c9e107de85878265d6ac84078bd3d75315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Carvalho?= Date: Tue, 1 Jun 2021 22:45:35 -0300 Subject: [PATCH 14/56] Update datetime.py --- pymove/utils/datetime.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymove/utils/datetime.py b/pymove/utils/datetime.py index d4b71445..523ffdac 100644 --- a/pymove/utils/datetime.py +++ b/pymove/utils/datetime.py @@ -623,11 +623,11 @@ def _calc_time_threshold(seg_mean: float, seg_std: float) -> float: Examples -------- >>> from pymove.utils.datetime import _calc_time_threshold - >>> print(_calc_time_threshold(12.3,2.1)) + >>> print(_calc_time_threshold(12.3, 2.1)) 14.4 - >>> print(_calc_time_threshold(1,1.5)) + >>> print(_calc_time_threshold(1, 1.5)) 2.5 - >>> print(_calc_time_threshold(-2,2)) + >>> print(_calc_time_threshold(-2, 2)) 0.0 """ threshold = seg_std + seg_mean From 672c4406ecb6b361b9502298afb180ea0f1dffc2 Mon Sep 17 00:00:00 2001 From: flych3r Date: Wed, 2 Jun 2021 11:31:29 -0300 Subject: [PATCH 15/56] added add_map_legend example --- pymove/utils/visual.py | 91 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 12d3211a..2599960d 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -40,6 +40,69 @@ def add_map_legend(m: Map, title: Text, items: List[Tuple]): ---------- https://github.com/python-visualization/folium/issues/528#issuecomment-421445303 + Examples + -------- + >>> import folium + >>> from pymove.utils.visual import add_map_legend + >>> df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> m = folium.Map(location=[df.lat.median(), df.lon.median()]) + >>> folium.PolyLine(mdf[['lat', 'lon']], color='red').add_to(m) + >>> pm.visual.add_map_legend(m, 'Color by ID', [(1, 'red')]) + >>> m.get_root().to_dict() + { + "name": "Figure", + "id": "1d32230cd6c54b19b35ceaa864e61168", + "children": { + "map_6f1abc8eacee41e8aa9d163e6bbb295f": { + "name": "Map", + "id": "6f1abc8eacee41e8aa9d163e6bbb295f", + "children": { + "openstreetmap": { + "name": "TileLayer", + "id": "f58c3659fea348cb828775f223e1e6a4", + "children": {} + }, + "poly_line_75023fd7df01475ea5e5606ddd7f4dd2": { + "name": "PolyLine", + "id": "75023fd7df01475ea5e5606ddd7f4dd2", + "children": {} + } + } + }, + "map_legend": { # legend element + "name": "MacroElement", + "id": "72911b4418a94358ba8790aab93573d1", + "children": {} + } + }, + "header": { + "name": "Element", + "id": "e46930fc4152431090b112424b5beb6a", + "children": { + "meta_http": { + "name": "Element", + "id": "868e20baf5744e82baf8f13a06849ecc", + "children": {} + } + } + }, + "html": { + "name": "Element", + "id": "9c4da9e0aac349f594e2d23298bac171", + "children": {} + }, + "script": { + "name": "Element", + "id": "d092078607c04076bf58bd4593fa1684", + "children": {} + } + } """ item = "
  • %s
  • " list_items = '\n'.join([item % (c, n) for (n, c) in items]) @@ -136,7 +199,7 @@ def add_map_legend(m: Map, title: Text, items: List[Tuple]): macro = MacroElement() macro._template = Template(template) - m.get_root().add_child(macro) + m.get_root().add_child(macro, name='map_legend') def generate_color() -> Text: @@ -232,11 +295,11 @@ def cmap_hex_color(cmap: ListedColormap, i: int) -> Text: Examples -------- >>> from pymove.utils.visual import cmap_hex_color - >>> # import matplotlib.pyplot as plt - >>> # jet = plt.get_cmap('jet') // This comand generates a Linear Segmented Colormap - >>> print(cmap_hex_color(jet,0)) + >>> import matplotlib.pyplot as plt + >>> jet = plt.get_cmap('jet') # This comand generates a Linear Segmented Colormap + >>> print(cmap_hex_color(jet, 0)) '#000080' - >>> print(cmap_hex_color(jet,1)) + >>> print(cmap_hex_color(jet, 1)) '#000084' """ return rgb2hex(cmap(i)) @@ -259,9 +322,8 @@ def get_cmap(cmap: Text) -> Colormap: Examples -------- >>> from pymove.utils.visual import get_cmap - >>> print(get_cmap('Greys'), type(get_cmap('Greys'))) + >>> print(get_cmap('Greys') - """ return _get_cmap(cmap) @@ -293,11 +355,16 @@ def save_wkt( 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 >>> save_wkt(df, 'test.wkt', 'id') + >>> with open('test.wtk') as f: + >>> print(f.read()) + 'id;linestring' + '1;LINESTRING(116.319236 39.984094,116.319322 39.984198,116.319402 39.984224)' + '2;LINESTRING(116.319389 39.984211,116.319422 39.984217)' """ - str_ = '%s;linestring\n' % label_id + wtk = '%s;linestring\n' % label_id ids = move_data[label_id].unique() for id_ in ids: move_df = move_data[move_data[label_id] == id_] @@ -307,6 +374,6 @@ def save_wkt( for x in move_df[[LONGITUDE, LATITUDE]].values ) curr_str += ')\n' - str_ += curr_str + wtk += curr_str with open(filename, 'w') as f: - f.write(str_) + f.write(wtk) From c16b21fc56816f79cdc7534d087abae182d19a14 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 8 Jun 2021 15:54:15 -0300 Subject: [PATCH 16/56] added mypy --- .pre-commit-config.yaml | 4 + pymove/core/dask.py | 146 ++++++------ pymove/core/dataframe.py | 16 +- pymove/core/grid.py | 41 ++-- pymove/core/interface.py | 2 +- pymove/core/pandas.py | 202 +++++++++-------- pymove/core/pandas_discrete.py | 43 ++-- pymove/models/pattern_mining/clustering.py | 24 +- pymove/preprocessing/compression.py | 18 +- pymove/preprocessing/filters.py | 94 ++++---- pymove/preprocessing/segmentation.py | 48 ++-- pymove/preprocessing/stay_point_detection.py | 18 +- pymove/query/query.py | 26 +-- pymove/semantic/semantic.py | 84 +++---- pymove/tests/test_utils_mem.py | 10 +- pymove/utils/conversions.py | 65 +++--- pymove/utils/data_augmentation.py | 30 +-- pymove/utils/datetime.py | 24 +- pymove/utils/distances.py | 29 +-- pymove/utils/geoutils.py | 16 +- pymove/utils/integration.py | 115 ++++++---- pymove/utils/log.py | 4 +- pymove/utils/math.py | 6 +- pymove/utils/mem.py | 17 +- pymove/utils/trajectories.py | 24 +- pymove/utils/visual.py | 4 +- pymove/visualization/folium.py | 224 +++++++++---------- pymove/visualization/matplotlib.py | 56 ++--- setup.cfg | 4 + 29 files changed, 732 insertions(+), 662 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7546e5d7..34dc62dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,3 +32,7 @@ repos: 'flake8-docstrings==1.6.0', 'pep8-naming==0.11.1' ] +# - repo: https://github.com/pre-commit/mirrors-mypy +# rev: 'v0.812' +# hooks: +# - id: mypy diff --git a/pymove/core/dask.py b/pymove/core/dask.py index d6a68088..99f654de 100644 --- a/pymove/core/dask.py +++ b/pymove/core/dask.py @@ -1,6 +1,6 @@ """DaskMoveDataFrame class.""" -from typing import TYPE_CHECKING, Dict, List, Optional, Text, Union +from typing import TYPE_CHECKING, Dict, List, Text, Union import dask import numpy as np @@ -28,11 +28,11 @@ class DaskMoveDataFrame(DataFrame, MoveDataFrameAbstractModel): def __init__( self, data: Union[DataFrame, List, Dict], - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME, - traj_id: Optional[Text] = TRAJ_ID, - n_partitions: Optional[int] = 1, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME, + traj_id: Text = TRAJ_ID, + n_partitions: int = 1, ): """ Checks whether past data has 'lat', 'lon', 'datetime' columns. @@ -78,7 +78,7 @@ def __init__( try: zip_list[i] = zip_list[i] except KeyError: - zip_list.append(i) + zip_list.append(str(i)) data = pd.DataFrame(data, columns=zip_list) mapping_columns = MoveDataFrame.format_labels( @@ -204,23 +204,23 @@ def shape(self): """Return a tuple representing the dimensionality of the DataFrame.""" raise NotImplementedError('To be implemented') - def rename(self): + def rename(self, *args, **kwargs): """Alter axes labels..""" raise NotImplementedError('To be implemented') - def len(self): + def len(self, *args, **kwargs): """Returns the length/row numbers in trajectory data.""" raise NotImplementedError('To be implemented') - def unique(self): + def unique(self, *args, **kwargs): """Return unique values of Series object.""" raise NotImplementedError('To be implemented') def head( self, - n: Optional[int] = 5, - npartitions: Optional[int] = 1, - compute: Optional[bool] = True + n: int = 5, + npartitions: int = 1, + compute: bool = True ) -> DataFrame: """ Return the first n rows. @@ -248,9 +248,9 @@ def head( def tail( self, - n: Optional[int] = 5, - npartitions: Optional[int] = 1, - compute: Optional[bool] = True + n: int = 5, + npartitions: int = 1, + compute: bool = True ) -> DataFrame: """ Return the last n rows. @@ -276,19 +276,19 @@ def tail( """ return self._data.tail(n, npartitions, compute) - def get_users_number(self): + def get_users_number(self, *args, **kwargs): """Check and return number of users in trajectory data.""" raise NotImplementedError('To be implemented') - def to_numpy(self): + def to_numpy(self, *args, **kwargs): """Converts trajectory data to numpy array format.""" raise NotImplementedError('To be implemented') - def to_dict(self): + def to_dict(self, *args, **kwargs): """Converts trajectory data to dict format.""" raise NotImplementedError('To be implemented') - def to_grid(self): + def to_grid(self, *args, **kwargs): """Converts trajectory data to grid format.""" raise NotImplementedError('To be implemented') @@ -304,205 +304,205 @@ def to_data_frame(self) -> DataFrame: """ return self._data - def info(self): + def info(self, *args, **kwargs): """Print a concise summary of a DataFrame.""" raise NotImplementedError('To be implemented') - def describe(self): + def describe(self, *args, **kwargs): """Generate descriptive statistics.""" raise NotImplementedError('To be implemented') - def memory_usage(self): + def memory_usage(self, *args, **kwargs): """Return the memory usage of each column in bytes.""" raise NotImplementedError('To be implemented') - def copy(self): + def copy(self, *args, **kwargs): """Make a copy of this object’srs indices and data.""" raise NotImplementedError('To be implemented') - def generate_tid_based_on_id_datetime(self): + def generate_tid_based_on_id_datetime(self, *args, **kwargs): """Create or update trajectory id based on id e datetime.""" raise NotImplementedError('To be implemented') - def generate_date_features(self): + def generate_date_features(self, *args, **kwargs): """Create or update date feature.""" raise NotImplementedError('To be implemented') - def generate_hour_features(self): + def generate_hour_features(self, *args, **kwargs): """Create or update hour feature.""" raise NotImplementedError('To be implemented') - def generate_day_of_the_week_features(self): + def generate_day_of_the_week_features(self, *args, **kwargs): """Create or update a feature day of the week from datatime.""" raise NotImplementedError('To be implemented') - def generate_weekend_features(self): + def generate_weekend_features(self, *args, **kwargs): """Create or update the feature weekend to the dataframe.""" raise NotImplementedError('To be implemented') - def generate_time_of_day_features(self): + def generate_time_of_day_features(self, *args, **kwargs): """Create a feature time of day or period from datatime.""" raise NotImplementedError('To be implemented') - def generate_datetime_in_format_cyclical(self): + def generate_datetime_in_format_cyclical(self, *args, **kwargs): """Create or update column with cyclical datetime feature.""" raise NotImplementedError('To be implemented') - def generate_dist_time_speed_features(self): + def generate_dist_time_speed_features(self, *args, **kwargs): """Creates features of distance, time and speed between points.""" raise NotImplementedError('To be implemented') - def generate_dist_features(self): + def generate_dist_features(self, *args, **kwargs): """Create the three distance in meters to an GPS point P.""" raise NotImplementedError('To be implemented') - def generate_time_features(self): + def generate_time_features(self, *args, **kwargs): """Create the three time in seconds to an GPS point P.""" raise NotImplementedError('To be implemented') - def generate_speed_features(self): + def generate_speed_features(self, *args, **kwargs): """Create the three speed in meters by seconds to an GPS point P.""" raise NotImplementedError('To be implemented') - def generate_move_and_stop_by_radius(self): + def generate_move_and_stop_by_radius(self, *args, **kwargs): """Create or update column with move and stop points by radius.""" raise NotImplementedError('To be implemented') - def time_interval(self): + def time_interval(self, *args, **kwargs): """Get time difference between max and min datetime in trajectory.""" raise NotImplementedError('To be implemented') - def get_bbox(self): + def get_bbox(self, *args, **kwargs): """Creates the bounding box of the trajectories.""" raise NotImplementedError('To be implemented') - def plot_all_features(self): + def plot_all_features(self, *args, **kwargs): """Generate a visualization for each column that type is equal dtype.""" raise NotImplementedError('To be implemented') - def plot_trajs(self): + def plot_trajs(self, *args, **kwargs): """Generate a visualization that show trajectories.""" raise NotImplementedError('To be implemented') - def plot_traj_id(self): + def plot_traj_id(self, *args, **kwargs): """Generate a visualization for a trajectory with the specified tid.""" raise NotImplementedError('To be implemented') - def show_trajectories_info(self): + def show_trajectories_info(self, *args, **kwargs): """Show dataset information from dataframe.""" raise NotImplementedError('To be implemented') - def min(self): + def min(self, *args, **kwargs): """Return the minimum of the values for the requested axis.""" raise NotImplementedError('To be implemented') - def max(self): + def max(self, *args, **kwargs): """Return the maximum of the values for the requested axis.""" raise NotImplementedError('To be implemented') - def count(self): + def count(self, *args, **kwargs): """Counts the non-NA cells for each column or row.""" raise NotImplementedError('To be implemented') - def groupby(self): + def groupby(self, *args, **kwargs): """Groups dask DataFrame using a mapper or by a Series of columns.""" raise NotImplementedError('To be implemented') - def plot(self): + def plot(self, *args, **kwargs): """Plot the data of the dask DataFrame.""" raise NotImplementedError('To be implemented') - def select_dtypes(self): + def select_dtypes(self, *args, **kwargs): """Returns a subset of the columns based on the column dtypes.""" raise NotImplementedError('To be implemented') - def astype(self): + def astype(self, *args, **kwargs): """Casts a dask object to a specified dtype.""" raise NotImplementedError('To be implemented') - def sort_values(self): + def sort_values(self, *args, **kwargs): """Sorts the values of the dask DataFrame.""" raise NotImplementedError('To be implemented') - def reset_index(self): + def reset_index(self, *args, **kwargs): """Resets the dask DataFrame'srs index, and use the default one.""" raise NotImplementedError('To be implemented') - def set_index(self): + def set_index(self, *args, **kwargs): """Set of row labels using one or more existing columns or arrays.""" raise NotImplementedError('To be implemented') - def drop(self): + def drop(self, *args, **kwargs): """Drops specified rows or columns of the dask Dataframe.""" raise NotImplementedError('To be implemented') - def duplicated(self): + def duplicated(self, *args, **kwargs): """Returns boolean Series denoting duplicate rows.""" raise NotImplementedError('To be implemented') - def drop_duplicates(self): + def drop_duplicates(self, *args, **kwargs): """Removes duplicated rows from the data.""" raise NotImplementedError('To be implemented') - def shift(self): + def shift(self, *args, **kwargs): """Shifts by desired number of periods with an optional time freq.""" raise NotImplementedError('To be implemented') - def all(self): + def all(self, *args, **kwargs): """Indicates if all elements are True, potentially over an axis.""" raise NotImplementedError('To be implemented') - def any(self): + def any(self, *args, **kwargs): """Indicates if any element is True, potentially over an axis.""" raise NotImplementedError('To be implemented') - def isna(self): + def isna(self, *args, **kwargs): """Detect missing values.""" raise NotImplementedError('To be implemented') - def fillna(self): + def fillna(self, *args, **kwargs): """Fills missing data in the dask DataFrame.""" raise NotImplementedError('To be implemented') - def dropna(self): + def dropna(self, *args, **kwargs): """Removes missing data from dask DataFrame.""" raise NotImplementedError('To be implemented') - def sample(self): + def sample(self, *args, **kwargs): """Samples data from the dask DataFrame.""" raise NotImplementedError('To be implemented') - def isin(self): + def isin(self, *args, **kwargs): """Determines whether each element is contained in values.""" raise NotImplementedError('To be implemented') - def append(self): + def append(self, *args, **kwargs): """Append rows of other to the end of caller, returning a new object.""" raise NotImplementedError('To be implemented') - def join(self): + def join(self, *args, **kwargs): """Join columns of another DataFrame.""" raise NotImplementedError('To be implemented') - def merge(self): + def merge(self, *args, **kwargs): """Merge columns of another DataFrame.""" raise NotImplementedError('To be implemented') - def nunique(self): + def nunique(self, *args, **kwargs): """Count distinct observations over requested axis.""" raise NotImplementedError('To be implemented') - def write_file(self): + def write_file(self, *args, **kwargs): """Write trajectory data to a new file.""" raise NotImplementedError('To be implemented') - def to_csv(self): + def to_csv(self, *args, **kwargs): """Write object to a comma-separated values (csv) file.""" raise NotImplementedError('To be implemented') def convert_to( self, new_type: Text - ) -> Union['PandasMoveDataFrame', 'DaskMoveDataFrame']: + ) -> Union[MoveDataFrame, 'PandasMoveDataFrame', 'DaskMoveDataFrame']: """ Convert an object from one type to another specified by the user. @@ -517,9 +517,7 @@ def convert_to( The converted object. """ - if new_type == TYPE_DASK: - return self - elif new_type == TYPE_PANDAS: + if new_type == TYPE_PANDAS: df_pandas = self._data.compute() return MoveDataFrame( df_pandas, @@ -529,6 +527,8 @@ def convert_to( traj_id=TRAJ_ID, type_=TYPE_PANDAS ) + else: + return self def get_type(self) -> Text: """ diff --git a/pymove/core/dataframe.py b/pymove/core/dataframe.py index e4052d22..3e586116 100644 --- a/pymove/core/dataframe.py +++ b/pymove/core/dataframe.py @@ -1,6 +1,6 @@ """MoveDataFrame class.""" -from typing import Dict, List, Optional, Text, Union +from typing import Dict, List, Text, Union from dateutil.parser._parser import ParserError from pandas.core.frame import DataFrame @@ -22,12 +22,12 @@ class MoveDataFrame: def __new__( self, data: Union[DataFrame, Dict, List], - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME, - traj_id: Optional[Text] = TRAJ_ID, - type_: Optional[Text] = TYPE_PANDAS, - n_partitions: Optional[int] = 1, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME, + traj_id: Text = TRAJ_ID, + type_: Text = TYPE_PANDAS, + n_partitions: int = 1, ): """ Creates the PyMove dataframe, which must contain latitude, longitude and datetime. @@ -48,7 +48,7 @@ def __new__( Represents column name trajectory id, by default TRAJ_ID type_ : str, optional Number of partitions of the dask dataframe, by default TYPE_PANDAS - n_partitions : Optional[int], optional + n_partitions : int, optional Amount of partitions for dask dataframe, by default 1 Raises diff --git a/pymove/core/grid.py b/pymove/core/grid.py index 276f239a..a29c65a7 100644 --- a/pymove/core/grid.py +++ b/pymove/core/grid.py @@ -47,19 +47,26 @@ def __init__( 'grid_size_lat_y': lat y size of grid, 'grid_size_lon_x': lon x size of grid, 'cell_size_by_degree': cell size in radians, - cell_size : float, optional + cell_size : float Represents grid cell size, by default None meters_by_degree : float, optional Represents the corresponding meters of lat by degree, by default lat_meters(-3.71839) + + Raises + ------ + ValueError + If one of data or cell grid is not provided """ - self.last_operation = None + self.last_operation: Dict = dict() if meters_by_degree is None: meters_by_degree = lat_meters(-3.71839) if isinstance(data, dict): self._grid_from_dict(data) - else: + elif cell_size is not None: self._create_virtual_grid(data, cell_size, meters_by_degree) + else: + raise ValueError('Must pass either data or cell size.') self.grid_polygon = None def get_grid(self) -> Dict: @@ -170,9 +177,9 @@ def _create_virtual_grid( def create_update_index_grid_feature( self, data: DataFrame, - unique_index: Optional[bool] = True, - label_dtype: Optional[Callable] = np.int64, - sort: Optional[bool] = True + unique_index: bool = True, + label_dtype: Callable = np.int64, + sort: bool = True ): """ Create or update index grid feature. @@ -185,7 +192,7 @@ def create_update_index_grid_feature( Represents the dataset with contains lat, long and datetime. unique_index: bool, optional How to index the grid, by default True - label_dtype : Optional[Callable], optional + label_dtype : Callable, optional Represents the type of a value of new column in dataframe, by default np.int64 sort : bool, optional Represents if needs to sort the dataframe, by default True @@ -211,8 +218,8 @@ def create_update_index_grid_feature( def convert_two_index_grid_to_one( self, data: DataFrame, - label_grid_lat: Optional[Text] = INDEX_GRID_LAT, - label_grid_lon: Optional[Text] = INDEX_GRID_LON, + label_grid_lat: Text = INDEX_GRID_LAT, + label_grid_lon: Text = INDEX_GRID_LON, ): """ Converts grid lat-lon ids to unique values. @@ -234,7 +241,7 @@ def convert_two_index_grid_to_one( def convert_one_index_grid_to_two( self, data: DataFrame, - label_grid_index: Optional[Text] = INDEX_GRID, + label_grid_index: Text = INDEX_GRID, ): """ Converts grid lat-lon ids to unique values. @@ -427,12 +434,12 @@ def read_grid_pkl(self, filename: Text) -> 'Grid': def show_grid_polygons( self, data: DataFrame, - markersize: Optional[float] = 10, - linewidth: Optional[float] = 2, - figsize: Optional[Tuple[int, int]] = (10, 10), - return_fig: Optional[bool] = True, - save_fig: Optional[bool] = False, - name: Optional[Text] = 'grid.png', + markersize: float = 10, + linewidth: float = 2, + figsize: Tuple[int, int] = (10, 10), + return_fig: bool = True, + save_fig: bool = False, + name: Text = 'grid.png', ) -> Optional[figure]: """ Generate a visualization with grid polygons. @@ -457,7 +464,7 @@ def show_grid_polygons( Returns ------- - Optional[figure] + figure The generated picture or None Raises diff --git a/pymove/core/interface.py b/pymove/core/interface.py index 3e03ad89..f8408f6e 100644 --- a/pymove/core/interface.py +++ b/pymove/core/interface.py @@ -279,7 +279,7 @@ def write_file(self): pass @abc.abstractmethod - def convert_to(self): + def convert_to(self, new_type: str): pass @abc.abstractmethod diff --git a/pymove/core/pandas.py b/pymove/core/pandas.py index a1693d66..1e48a129 100644 --- a/pymove/core/pandas.py +++ b/pymove/core/pandas.py @@ -55,10 +55,10 @@ class PandasMoveDataFrame(DataFrame): def __init__( self, data: Union[DataFrame, List, Dict], - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME, - traj_id: Optional[Text] = TRAJ_ID, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME, + traj_id: Text = TRAJ_ID, ): """ Checks whether past data has 'lat', 'lon', 'datetime' columns. @@ -103,7 +103,7 @@ def __init__( try: zip_list[i] = zip_list[i] except KeyError: - zip_list.append(i) + zip_list.append(str(i)) data = DataFrame(data, columns=zip_list) columns = MoveDataFrame.format_labels( @@ -115,7 +115,7 @@ def __init__( MoveDataFrame.validate_move_data_frame(tdf) super(PandasMoveDataFrame, self).__init__(tdf) self._type = TYPE_PANDAS - self.last_operation = None + self.last_operation: Dict = dict() else: raise KeyError( @@ -189,13 +189,13 @@ def datetime(self): return self[DATETIME] def rename( - self, - mapper: Optional[Union[Dict, Callable]] = None, - index: Optional[Union[Dict, Callable]] = None, - columns: Optional[Union[Dict, Callable]] = None, - axis: Optional[Union[int, Text]] = None, - copy: Optional[bool] = True, - inplace: Optional[bool] = False + self, + mapper: Optional[Union[Dict, Callable]] = None, + index: Optional[Union[Dict, Callable]] = None, + columns: Optional[Union[Dict, Callable]] = None, + axis: Optional[Union[int, Text]] = None, + copy: bool = True, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', DataFrame]]: """ Alter axes labels. @@ -245,7 +245,7 @@ def rename( if inplace: if MoveDataFrame.has_columns(rename_): self._mgr = rename_._mgr - self._item_cache = dict() + self._item_cache: Dict = dict() rename_ = None else: raise AttributeError( @@ -277,7 +277,7 @@ def __getitem__(self, key): return PandasMoveDataFrame(item) return item - def head(self, n: Optional[int] = 5) -> 'PandasMoveDataFrame': + def head(self, n: int = 5) -> 'PandasMoveDataFrame': """ Return the first n rows. @@ -303,7 +303,7 @@ def head(self, n: Optional[int] = 5) -> 'PandasMoveDataFrame': head_ = super().head(n=n) return PandasMoveDataFrame(data=head_) - def tail(self, n: Optional[int] = 5) -> 'PandasMoveDataFrame': + def tail(self, n: int = 5) -> 'PandasMoveDataFrame': """ Return the last n rows. @@ -394,7 +394,7 @@ def to_data_frame(self) -> DataFrame: return DataFrame(self) def to_dicrete_move_df( - self, local_label: Optional[Text] = LOCAL_LABEL + self, local_label: Text = LOCAL_LABEL ) -> 'PandasMoveDataFrame': """ Generate a discrete dataframe move. @@ -423,7 +423,7 @@ def to_dicrete_move_df( self, LATITUDE, LONGITUDE, DATETIME, TRAJ_ID, local_label ) - def copy(self, deep: Optional[bool] = True) -> 'PandasMoveDataFrame': + def copy(self, deep: bool = True) -> 'PandasMoveDataFrame': """ Make a copy of this object’s indices and data. @@ -464,9 +464,9 @@ def copy(self, deep: Optional[bool] = True) -> 'PandasMoveDataFrame': def generate_tid_based_on_id_datetime( self, - str_format: Optional[Text] = '%Y%m%d%H', - sort: Optional[bool] = True, - inplace: Optional[bool] = True + str_format: Text = '%Y%m%d%H', + sort: bool = True, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create or update trajectory id based on id and datetime. @@ -512,7 +512,7 @@ def generate_tid_based_on_id_datetime( return data def generate_date_features( - self, inplace: Optional[bool] = True + self, inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create or update date feature based on datetime. @@ -545,7 +545,7 @@ def generate_date_features( return data def generate_hour_features( - self, inplace: Optional[bool] = True + self, inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create or update hour features based on datetime. @@ -578,7 +578,7 @@ def generate_hour_features( return data def generate_day_of_the_week_features( - self, inplace: Optional[bool] = True + self, inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create or update day of the week features based on datetime. @@ -610,7 +610,9 @@ def generate_day_of_the_week_features( return data def generate_weekend_features( - self, create_day_of_week: Optional[bool] = False, inplace: Optional[bool] = True + self, + create_day_of_week: bool = False, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Adds information to rows determining if it is a weekend day. @@ -658,7 +660,7 @@ def generate_weekend_features( return data def generate_time_of_day_features( - self, inplace: Optional[bool] = True + self, inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create or update time of day features based on datetime. @@ -716,7 +718,9 @@ def generate_time_of_day_features( return data def generate_datetime_in_format_cyclical( - self, label_datetime: Optional[Text] = DATETIME, inplace: Optional[bool] = True + self, + label_datetime: Text = DATETIME, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create or update column with cyclical datetime feature. @@ -760,7 +764,7 @@ def generate_datetime_in_format_cyclical( @staticmethod def _prepare_generate_data( data_: DataFrame, sort: bool, label_id: Text - ) -> Tuple[List, int, int, int]: + ) -> Tuple[Any, int, None]: """ Processes the data and create variables for generate methods. @@ -803,10 +807,10 @@ def _prepare_generate_data( def generate_dist_time_speed_features( self, - label_id: Optional[Text] = TRAJ_ID, - label_dtype: Optional[Callable] = np.float64, - sort: Optional[bool] = True, - inplace: Optional[bool] = True + label_id: Text = TRAJ_ID, + label_dtype: Callable = np.float64, + sort: bool = True, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Adds distance, time and speed information to the dataframe. @@ -899,10 +903,10 @@ def generate_dist_time_speed_features( def generate_dist_features( self, - label_id: Optional[Text] = TRAJ_ID, - label_dtype: Optional[Callable] = np.float64, - sort: Optional[bool] = True, - inplace: Optional[bool] = True + label_id: Text = TRAJ_ID, + label_dtype: Callable = np.float64, + sort: bool = True, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create the three distance in meters to an GPS point P. @@ -987,10 +991,10 @@ def generate_dist_features( def generate_time_features( self, - label_id: Optional[Text] = TRAJ_ID, - label_dtype: Optional[Callable] = np.float64, - sort: Optional[bool] = True, - inplace: Optional[bool] = True + label_id: Text = TRAJ_ID, + label_dtype: Callable = np.float64, + sort: bool = True, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create the three time in seconds to an GPS point P. @@ -1066,10 +1070,10 @@ def generate_time_features( def generate_speed_features( self, - label_id: Optional[Text] = TRAJ_ID, - label_dtype: Optional[Callable] = np.float64, - sort: Optional[bool] = True, - inplace: Optional[bool] = True + label_id: Text = TRAJ_ID, + label_dtype: Callable = np.float64, + sort: bool = True, + inplace: bool = True ) -> Optional['PandasMoveDataFrame']: """ Create the three speed in meter by seconds to an GPS point P. @@ -1091,6 +1095,11 @@ def generate_speed_features( PandasMoveDataFrame Object with new features or None + Raises + ------ + ValueError + If feature generation fails + Examples -------- - P to P.next = 1 meter/seconds @@ -1108,15 +1117,15 @@ def generate_speed_features( '\nCreating or updating speed features meters by seconds\n' ) - dist_cols = [DIST_TO_PREV, DIST_TO_NEXT, DIST_PREV_TO_NEXT] - time_cols = [TIME_TO_PREV, TIME_TO_NEXT, TIME_PREV_TO_NEXT] - dists = data.generate_dist_features( label_id, label_dtype, sort, inplace=False - )[dist_cols] + ) times = data.generate_time_features( label_id, label_dtype, sort, inplace=False - )[time_cols] + ) + + if dists is None or times is None: + raise ValueError('Geretating dist or time feature failed') data[SPEED_TO_PREV] = dists[DIST_TO_PREV] / times[TIME_TO_PREV] data[SPEED_TO_NEXT] = dists[DIST_TO_NEXT] / times[TIME_TO_NEXT] @@ -1129,14 +1138,15 @@ def generate_speed_features( ) data.reset_index(inplace=True) data.last_operation = end_operation(operation) + if not inplace: return data def generate_move_and_stop_by_radius( self, - radius: Optional[int] = 0, - target_label: Optional[Text] = DIST_TO_PREV, - inplace: Optional[bool] = True + radius: int = 0, + target_label: Text = DIST_TO_PREV, + inplace: bool = True ): """ Create or update column with move and stop points by radius. @@ -1317,8 +1327,8 @@ def show_trajectories_info(self): def astype( self, dtype: Union[Callable, Dict], - copy: Optional[bool] = True, - errors: Optional[Text] = 'raise' + copy: bool = True, + errors: Text = 'raise' ) -> DataFrame: """ Cast a pandas object to a specified dtype. @@ -1373,11 +1383,11 @@ def astype( def sort_values( self, by: Union[Text, List[Text]], - axis: Optional[int] = 0, - ascending: Optional[bool] = True, - inplace: Optional[bool] = False, - kind: Optional[Text] = 'quicksort', - na_position: Optional[Text] = 'last', + axis: int = 0, + ascending: bool = True, + inplace: bool = False, + kind: Text = 'quicksort', + na_position: Text = 'last', ) -> Optional['PandasMoveDataFrame']: """ Sorts the values of the _data, along an axis. @@ -1432,10 +1442,10 @@ def sort_values( def reset_index( self, level: Optional[Union[int, Text, Tuple, List]] = None, - drop: Optional[bool] = False, - inplace: Optional[bool] = False, - col_level: Optional[Union[int, Text]] = 0, - col_fill: Optional[Text] = '' + drop: bool = False, + inplace: bool = False, + col_level: Union[int, Text] = 0, + col_fill: Text = '' ) -> Optional['PandasMoveDataFrame']: """ Resets the DataFrame's index, and use the default one. @@ -1480,10 +1490,10 @@ def reset_index( def set_index( self, keys: Union[Text, List[Text]], - drop: Optional[bool] = True, - append: Optional[bool] = False, - inplace: Optional[bool] = False, - verify_integrity: Optional[bool] = False, + drop: bool = True, + append: bool = False, + inplace: bool = False, + verify_integrity: bool = False, ) -> Optional[Union['PandasMoveDataFrame', DataFrame]]: """ Set the DataFrame index (row labels) using one or more existing columns or arrays. @@ -1549,12 +1559,12 @@ def set_index( def drop( self, labels: Optional[Union[Text, List[Text]]] = None, - axis: Optional[Union[int, Text]] = 0, + axis: Union[int, Text] = 0, index: Optional[Union[Text, List[Text]]] = None, columns: Optional[Union[Text, List[Text]]] = None, level: Optional[Union[int, Text]] = None, - inplace: Optional[bool] = False, - errors: Optional[Text] = 'raise', + inplace: bool = False, + errors: Text = 'raise', ) -> Optional[Union['PandasMoveDataFrame', DataFrame]]: """ Removes rows or columns. @@ -1642,8 +1652,8 @@ def drop( def drop_duplicates( self, subset: Optional[Union[int, Text]] = None, - keep: Optional[Union[Text, bool]] = 'first', - inplace: Optional[bool] = False + keep: Union[Text, bool] = 'first', + inplace: bool = False ) -> Optional['PandasMoveDataFrame']: """ Uses the pandas's function drop_duplicates, to remove duplicated rows from data. @@ -1683,9 +1693,9 @@ def drop_duplicates( def shift( self, - periods: Optional[int] = 1, + periods: int = 1, freq: Optional[Union[DateOffset, Timedelta, Text]] = None, - axis: Optional[Union[int, Text]] = 0, + axis: Union[int, Text] = 0, fill_value: Optional[Any] = None ) -> 'PandasMoveDataFrame': """ @@ -1733,7 +1743,7 @@ def fillna( value: Optional[Any] = None, method: Optional[Text] = None, axis: Optional[Union[int, Text]] = None, - inplace: Optional[bool] = False, + inplace: bool = False, limit: Optional[int] = None, downcast: Optional[Dict] = None, ): @@ -1793,11 +1803,11 @@ def fillna( def dropna( self, - axis: Optional[Union[int, Text]] = 0, - how: Optional[Text] = 'any', + axis: Union[int, Text] = 0, + how: Text = 'any', thresh: Optional[float] = None, subset: Optional[List] = None, - inplace: Optional[bool] = False + inplace: bool = False ): """ Removes missing data. @@ -1862,7 +1872,7 @@ def sample( self, n: Optional[int] = None, frac: Optional[float] = None, - replace: Optional[bool] = False, + replace: bool = False, weights: Optional[Union[Text, List]] = None, random_state: Optional[int] = None, axis: Optional[Union[int, Text]] = None @@ -1956,9 +1966,9 @@ def isin(self, values: Union[List, Series, DataFrame, Dict]) -> DataFrame: def append( self, other: Union['PandasMoveDataFrame', DataFrame], - ignore_index: Optional[bool] = False, - verify_integrity: Optional[bool] = False, - sort: Optional[bool] = False + ignore_index: bool = False, + verify_integrity: bool = False, + sort: bool = False ) -> 'PandasMoveDataFrame': """ Append rows of other to the end of caller, returning a new object. @@ -2002,10 +2012,10 @@ def join( self, other: Union['PandasMoveDataFrame', DataFrame], on: Optional[Union[Text, List]] = None, - how: Optional[Text] = 'left', - lsuffix: Optional[Text] = '', - rsuffix: Optional[Text] = '', - sort: Optional[bool] = False + how: Text = 'left', + lsuffix: Text = '', + rsuffix: Text = '', + sort: bool = False ) -> 'PandasMoveDataFrame': """ Join columns of other, returning a new object. @@ -2071,16 +2081,16 @@ def join( def merge( self, right: Union['PandasMoveDataFrame', DataFrame, Series], - how: Optional[Text] = 'inner', + how: Text = 'inner', on: Optional[Union[Text, List]] = None, left_on: Optional[Union[Text, List]] = None, right_on: Optional[Union[Text, List]] = None, - left_index: Optional[bool] = False, - right_index: Optional[bool] = False, - sort: Optional[bool] = False, - suffixes: Optional[Tuple[Text, Text]] = ('_x', '_y'), - copy: Optional[bool] = True, - indicator: Optional[Union[bool, Text]] = False, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Tuple[Text, Text] = ('_x', '_y'), + copy: bool = True, + indicator: Union[bool, Text] = False, validate: Optional[Text] = None ) -> 'PandasMoveDataFrame': """ @@ -2170,7 +2180,7 @@ def merge( ) return PandasMoveDataFrame(data=_merge) - def write_file(self, file_name: Text, separator: Optional[Text] = ','): + def write_file(self, file_name: Text, separator: Text = ','): """ Write trajectory data to a new file. @@ -2188,7 +2198,7 @@ def write_file(self, file_name: Text, separator: Optional[Text] = ','): def convert_to( self, new_type: Text - ) -> Union['PandasMoveDataFrame', 'DaskMoveDataFrame']: + ) -> Union[MoveDataFrame, 'PandasMoveDataFrame', 'DaskMoveDataFrame']: """ Convert an object from one type to another specified by the user. @@ -2217,7 +2227,7 @@ def convert_to( ) self.last_operation = end_operation(operation) return _dask - elif new_type == TYPE_PANDAS: + else: self.last_operation = end_operation(operation) return self diff --git a/pymove/core/pandas_discrete.py b/pymove/core/pandas_discrete.py index 7f38be20..54e9c93e 100644 --- a/pymove/core/pandas_discrete.py +++ b/pymove/core/pandas_discrete.py @@ -38,11 +38,11 @@ class PandasDiscreteMoveDataFrame(PandasMoveDataFrame): def __init__( self, data: Union[DataFrame, List, Dict], - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME, - traj_id: Optional[Text] = TRAJ_ID, - local_label: Optional[Text] = LOCAL_LABEL + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME, + traj_id: Text = TRAJ_ID, + local_label: Text = LOCAL_LABEL ): """ Creates a dataframe using local_label as a discrete feature for localization. @@ -82,7 +82,7 @@ def __init__( '{} column not in dataframe'.format(local_label) ) - def discretize_based_grid(self, region_size: Optional[int] = 1000): + def discretize_based_grid(self, region_size: int = 1000): """ Discrete space in cells of the same size, assigning a unique id to each cell. @@ -100,10 +100,10 @@ def discretize_based_grid(self, region_size: Optional[int] = 1000): def generate_prev_local_features( self, - label_id: Optional[Text] = TRAJ_ID, - local_label: Optional[Text] = LOCAL_LABEL, - sort: Optional[bool] = True, - inplace: Optional[bool] = True + label_id: Text = TRAJ_ID, + local_label: Text = LOCAL_LABEL, + sort: bool = True, + inplace: bool = True ) -> Optional['PandasDiscreteMoveDataFrame']: """ Create a feature prev_local with the label of previous local to current point. @@ -162,19 +162,20 @@ def generate_prev_local_features( data_.reset_index(inplace=True) data_.last_operation = end_operation(operation) + if not inplace: return data_ def generate_tid_based_statistics( self, - label_id: Optional[Text] = TRAJ_ID, - local_label: Optional[Text] = LOCAL_LABEL, - mean_coef: Optional[float] = 1.0, - std_coef: Optional[float] = 1.0, + label_id: Text = TRAJ_ID, + local_label: Text = LOCAL_LABEL, + mean_coef: float = 1.0, + std_coef: float = 1.0, statistics: Optional[DataFrame] = None, - label_tid_stat: Optional[Text] = TID_STAT, - drop_single_points: Optional[bool] = False, - inplace: Optional[bool] = True, + label_tid_stat: Text = TID_STAT, + drop_single_points: bool = False, + inplace: bool = True, ) -> Optional['PandasDiscreteMoveDataFrame']: """ Splits the trajectories into segments based on time statistics for segments. @@ -190,7 +191,7 @@ def generate_tid_based_statistics( Multiplication coefficient of the mean time for the segment, by default 1.0 std_coef : float, optional Multiplication coefficient of sdt time for the segment, by default 1.0 - statistics : Optional[DataFrame], optional + statistics : DataFrame, optional Time Statistics of the pairwise local labels, by default None label_tid_stat : str, optional The label of the column containing the ids of the formed segments. @@ -260,9 +261,10 @@ def generate_tid_based_statistics( filter_.append(row[TIME_TO_PREV] > threshold) - filter_ = np.array(filter_) + filter_arr = np.array(filter_) current_tid, count = _update_curr_tid_count( - filter_, data_, idx, label_tid_stat, current_tid, count) + filter_arr, data_, idx, label_tid_stat, current_tid, count + ) if label_id == TID_STAT: self.reset_index(drop=True, inplace=True) @@ -275,5 +277,6 @@ def generate_tid_based_statistics( if drop_single_points: _drop_single_point(data_, TID_STAT, label_id) self.generate_dist_time_speed_features() + if not inplace: return data_ diff --git a/pymove/models/pattern_mining/clustering.py b/pymove/models/pattern_mining/clustering.py index 7a519cd0..d9302b3b 100644 --- a/pymove/models/pattern_mining/clustering.py +++ b/pymove/models/pattern_mining/clustering.py @@ -21,9 +21,9 @@ @timer_decorator def elbow_method( move_data: DataFrame, - k_initial: Optional[int] = 1, - max_clusters: Optional[int] = 15, - k_iteration: Optional[int] = 1, + k_initial: int = 1, + max_clusters: int = 15, + k_iteration: int = 1, random_state: Optional[int] = None ) -> Dict: """ @@ -80,10 +80,10 @@ def elbow_method( @timer_decorator def gap_statistic( move_data: DataFrame, - nrefs: Optional[int] = 3, - k_initial: Optional[int] = 1, - max_clusters: Optional[int] = 15, - k_iteration: Optional[int] = 1, + nrefs: int = 3, + k_initial: int = 1, + max_clusters: int = 15, + k_iteration: int = 1, random_state: Optional[int] = None ) -> Dict: """ @@ -152,11 +152,11 @@ def gap_statistic( def dbscan_clustering( move_data: DataFrame, cluster_by: Text, - meters: Optional[int] = 10, - min_sample: Optional[float] = 1680 / 2, - earth_radius: Optional[float] = EARTH_RADIUS, - metric: Optional[Union[Text, Callable]] = 'euclidean', - inplace: Optional[bool] = False + meters: int = 10, + min_sample: float = 1680 / 2, + earth_radius: float = EARTH_RADIUS, + metric: Union[Text, Callable] = 'euclidean', + inplace: bool = False ) -> Optional[DataFrame]: """ Performs density based clustering on the move_dataframe according to cluster_by. diff --git a/pymove/preprocessing/compression.py b/pymove/preprocessing/compression.py index c70e96b6..3be11a53 100644 --- a/pymove/preprocessing/compression.py +++ b/pymove/preprocessing/compression.py @@ -5,7 +5,7 @@ """ -from typing import Optional, Text +from typing import Text import numpy as np from pandas import DataFrame @@ -28,14 +28,14 @@ @timer_decorator def compress_segment_stop_to_point( move_data: DataFrame, - label_segment: Optional[Text] = SEGMENT_STOP, - label_stop: Optional[Text] = STOP, - point_mean: Optional[Text] = 'default', - drop_moves: Optional[bool] = False, - label_id: Optional[Text] = TRAJ_ID, - dist_radius: Optional[float] = 30, - time_radius: Optional[float] = 900, - inplace: Optional[bool] = False, + label_segment: Text = SEGMENT_STOP, + label_stop: Text = STOP, + point_mean: Text = 'default', + drop_moves: bool = False, + label_id: Text = TRAJ_ID, + dist_radius: float = 30, + time_radius: float = 900, + inplace: bool = False, ) -> DataFrame: """ Compress the trajectories using the stop points in the dataframe. diff --git a/pymove/preprocessing/filters.py b/pymove/preprocessing/filters.py index 885ba011..0f2c9d28 100644 --- a/pymove/preprocessing/filters.py +++ b/pymove/preprocessing/filters.py @@ -18,7 +18,7 @@ """ -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Text, Tuple, Union import numpy as np from pandas import DataFrame @@ -43,8 +43,8 @@ def get_bbox_by_radius( - coordinates: Tuple[float, float], radius: Optional[float] = 1000 -) -> List: + coordinates: Tuple[float, float], radius: float = 1000 +) -> Tuple[float, float, float, float]: """ Defines minimum and maximum coordinates, given a distance radius from a point. @@ -77,14 +77,14 @@ def get_bbox_by_radius( lonmin = lon - delta_lon lonmax = lon + delta_lon - return np.rad2deg([latmin, lonmin, latmax, lonmax]) + return tuple(np.rad2deg([latmin, lonmin, latmax, lonmax])) def by_bbox( move_data: DataFrame, - bbox: Tuple[int, int, int, int], - filter_out: Optional[bool] = False, - inplace: Optional[bool] = False + bbox: Tuple[float, float, float, float], + filter_out: bool = False, + inplace: bool = False ) -> Optional[DataFrame]: """ Filters points of the trajectories according to specified bounding box. @@ -125,8 +125,8 @@ def by_datetime( move_data: DataFrame, start_datetime: Optional[Text] = None, end_datetime: Optional[Text] = None, - filter_out: Optional[bool] = False, - inplace: Optional[bool] = False, + filter_out: bool = False, + inplace: bool = False, ) -> Optional[DataFrame]: """ Filters trajectories points according to specified time range. @@ -174,8 +174,8 @@ def by_label( move_data: DataFrame, value: Any, label_name: Text, - filter_out: Optional[bool] = False, - inplace: Optional[bool] = False + filter_out: bool = False, + inplace: bool = False ) -> Optional[DataFrame]: """ Filters trajectories points according to specified value and column label. @@ -213,9 +213,9 @@ def by_label( def by_id( move_data: DataFrame, id_: Optional[int] = None, - label_id: Optional[Text] = TRAJ_ID, - filter_out: Optional[bool] = False, - inplace: Optional[bool] = False + label_id: Text = TRAJ_ID, + filter_out: bool = False, + inplace: bool = False ) -> Optional[DataFrame]: """ Filters trajectories points according to specified trajectory id. @@ -251,8 +251,8 @@ def by_id( def by_tid( move_data: DataFrame, tid_: Optional[Text] = None, - filter_out: Optional[bool] = False, - inplace: Optional[bool] = False + filter_out: bool = False, + inplace: bool = False ) -> Optional[DataFrame]: """ Filters trajectories points according to a specified trajectory tid. @@ -287,8 +287,8 @@ def by_tid( def clean_consecutive_duplicates( move_data: DataFrame, subset: Optional[Union[int, Text]] = None, - keep: Optional[Union[Text, bool]] = 'first', - inplace: Optional[bool] = False + keep: Union[Text, bool] = 'first', + inplace: bool = False ) -> Optional[DataFrame]: """ Removes consecutive duplicate rows of the Dataframe. @@ -467,11 +467,11 @@ def _clean_gps(move_data: DataFrame, f: callable, **kwargs): def clean_gps_jumps_by_distance( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - jump_coefficient: Optional[float] = 3.0, - threshold: Optional[float] = 1, - label_dtype: Optional[callable] = np.float64, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + jump_coefficient: float = 3.0, + threshold: float = 1, + label_dtype: callable = np.float64, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Removes the trajectories points that are outliers from the dataframe. @@ -525,10 +525,10 @@ def clean_gps_jumps_by_distance( def clean_gps_nearby_points_by_distances( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - radius_area: Optional[float] = 10.0, - label_dtype: Optional[callable] = np.float64, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + radius_area: float = 10.0, + label_dtype: callable = np.float64, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Removes points from the trajectories with smaller distance from the point before. @@ -580,10 +580,10 @@ def clean_gps_nearby_points_by_distances( def clean_gps_nearby_points_by_speed( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - speed_radius: Optional[float] = 0.0, - label_dtype: Optional[Callable] = np.float64, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + speed_radius: float = 0.0, + label_dtype: Callable = np.float64, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Removes points from the trajectories with smaller speed of travel. @@ -635,10 +635,10 @@ def clean_gps_nearby_points_by_speed( def clean_gps_speed_max_radius( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - speed_max: Optional[float] = 50.0, - label_dtype: Optional[Callable] = np.float64, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + speed_max: float = 50.0, + label_dtype: Callable = np.float64, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Removes trajectories points with higher speed. @@ -699,9 +699,9 @@ def clean_gps_speed_max_radius( def clean_trajectories_with_few_points( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_tid: Optional[Text] = TID, - min_points_per_trajectory: Optional[int] = 2, - inplace: Optional[bool] = False + label_tid: Text = TID, + min_points_per_trajectory: int = 2, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Removes from the given dataframe, trajectories with fewer points. @@ -776,11 +776,11 @@ def clean_trajectories_with_few_points( def clean_trajectories_short_and_few_points( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TID, - min_trajectory_distance: Optional[float] = 100, - min_points_per_trajectory: Optional[int] = 2, - label_dtype: Optional[Callable] = np.float64, - inplace: Optional[bool] = False, + label_id: Text = TID, + min_trajectory_distance: float = 100, + min_points_per_trajectory: int = 2, + label_dtype: Callable = np.float64, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Eliminates from the given dataframe trajectories with fewer points and shorter length. @@ -864,10 +864,10 @@ def clean_trajectories_short_and_few_points( def clean_id_by_time_max( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - time_max: Optional[float] = 3600, - label_dtype: Optional[Callable] = np.float64, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + time_max: float = 3600, + label_dtype: Callable = np.float64, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Clears GPS points with time by ID greater than a user-defined limit. diff --git a/pymove/preprocessing/segmentation.py b/pymove/preprocessing/segmentation.py index f6030afb..59c94f01 100644 --- a/pymove/preprocessing/segmentation.py +++ b/pymove/preprocessing/segmentation.py @@ -209,7 +209,7 @@ def _prepare_segmentation(move_data: DataFrame, label_id: Text, label_new_tid: T def _update_curr_tid_count( filter_: ndarray, move_data: DataFrame, idx: int, - label_new_tid: Text, curr_tid: float, count: int + label_new_tid: Text, curr_tid: int, count: int ) -> Tuple[int, int]: """ Updates the tid. @@ -334,13 +334,13 @@ def _filter_by( @timer_decorator def by_dist_time_speed( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - max_dist_between_adj_points: Optional[float] = 3000, - max_time_between_adj_points: Optional[float] = 900, - max_speed_between_adj_points: Optional[float] = 50.0, - drop_single_points: Optional[bool] = True, - label_new_tid: Optional[Text] = TID_PART, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + max_dist_between_adj_points: float = 3000, + max_time_between_adj_points: float = 900, + max_speed_between_adj_points: float = 50.0, + drop_single_points: bool = True, + label_new_tid: Text = TID_PART, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Splits the trajectories into segments based on distance, time and speed. @@ -414,11 +414,11 @@ def by_dist_time_speed( @timer_decorator def by_max_dist( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - max_dist_between_adj_points: Optional[float] = 3000, - drop_single_points: Optional[bool] = True, - label_new_tid: Optional[Text] = TID_DIST, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + max_dist_between_adj_points: float = 3000, + drop_single_points: bool = True, + label_new_tid: Text = TID_DIST, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Segments the trajectories based on distance. @@ -481,11 +481,11 @@ def by_max_dist( @timer_decorator def by_max_time( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - max_time_between_adj_points: Optional[float] = 900.0, - drop_single_points: Optional[bool] = True, - label_new_tid: Optional[Text] = TID_TIME, - inplace: Optional[bool] = False, + label_id: Text = TRAJ_ID, + max_time_between_adj_points: float = 900.0, + drop_single_points: bool = True, + label_new_tid: Text = TID_TIME, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Splits the trajectories into segments based on a maximum. @@ -549,12 +549,12 @@ def by_max_time( @timer_decorator def by_max_speed( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Optional[Text] = TRAJ_ID, - max_speed_between_adj_points: Optional[float] = 50.0, - drop_single_points: Optional[bool] = True, - label_new_tid: Optional[Text] = TID_SPEED, - inplace: Optional[bool] = False, -) -> Union['PandasMoveDataFrame', 'DaskMoveDataFrame']: + label_id: Text = TRAJ_ID, + max_speed_between_adj_points: float = 50.0, + drop_single_points: bool = True, + label_new_tid: Text = TID_SPEED, + inplace: bool = False, +) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Splits the trajectories into segments based on a maximum speed. diff --git a/pymove/preprocessing/stay_point_detection.py b/pymove/preprocessing/stay_point_detection.py index df92f40e..ba819f5f 100644 --- a/pymove/preprocessing/stay_point_detection.py +++ b/pymove/preprocessing/stay_point_detection.py @@ -30,11 +30,11 @@ @timer_decorator def create_or_update_move_stop_by_dist_time( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - dist_radius: Optional[float] = 30, - time_radius: Optional[float] = 900, - label_id: Optional[Text] = TRAJ_ID, - new_label: Optional[Text] = SEGMENT_STOP, - inplace: Optional[bool] = False + dist_radius: float = 30, + time_radius: float = 900, + label_id: Text = TRAJ_ID, + new_label: Text = SEGMENT_STOP, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Determines the stops and moves points of the dataframe. @@ -113,10 +113,10 @@ def create_or_update_move_stop_by_dist_time( @timer_decorator def create_or_update_move_and_stop_by_radius( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - radius: Optional[float] = 0, - target_label: Optional[Text] = DIST_TO_PREV, - new_label: Optional[Text] = SITUATION, - inplace: Optional[bool] = False, + radius: float = 0, + target_label: Text = DIST_TO_PREV, + new_label: Text = SITUATION, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Finds the stops and moves points of the dataframe. diff --git a/pymove/query/query.py b/pymove/query/query.py index 6dd641f5..3ea17ee2 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -6,7 +6,7 @@ """ -from typing import Optional, Text +from typing import Text import numpy as np import pandas as pd @@ -20,12 +20,12 @@ def range_query( traj: DataFrame, move_df: DataFrame, - _id: Optional[Text] = TRAJ_ID, - min_dist: Optional[float] = 1000, - distance: Optional[Text] = MEDP, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME + _id: Text = TRAJ_ID, + min_dist: float = 1000, + distance: Text = MEDP, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME ) -> DataFrame: """ Returns all trajectories that have a distance equal to or less than the trajectory. @@ -93,12 +93,12 @@ def dist_measure(traj, this, latitude, longitude, datetime): def knn_query( traj: DataFrame, move_df: DataFrame, - k: Optional[int] = 5, - id_: Optional[Text] = TRAJ_ID, - distance: Optional[Text] = MEDP, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME + k: int = 5, + id_: Text = TRAJ_ID, + distance: Text = MEDP, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME ) -> DataFrame: """ Returns the k neighboring trajectories closest to the trajectory. diff --git a/pymove/semantic/semantic.py b/pymove/semantic/semantic.py index 6416ab62..ab69f96d 100644 --- a/pymove/semantic/semantic.py +++ b/pymove/semantic/semantic.py @@ -109,10 +109,10 @@ def _process_simple_filter( @timer_decorator def outliers( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - jump_coefficient: Optional[float] = 3.0, - threshold: Optional[float] = 1, - new_label: Optional[Text] = OUTLIER, - inplace: Optional[bool] = False + jump_coefficient: float = 3.0, + threshold: float = 1, + new_label: Text = OUTLIER, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Create or update a boolean feature to detect outliers. @@ -174,8 +174,8 @@ def outliers( def create_or_update_out_of_the_bbox( move_data: DataFrame, bbox: Tuple[int, int, int, int], - new_label: Optional[Text] = OUT_BBOX, - inplace: Optional[bool] = False + new_label: Text = OUT_BBOX, + inplace: bool = False ) -> Optional[DataFrame]: """ Create or update a boolean feature to detect points out of the bbox. @@ -201,6 +201,10 @@ def create_or_update_out_of_the_bbox( Returns dataframe with a boolean feature with detected points out of the bbox, or None + Raises + ------ + ValueError + If feature generation fails """ if not inplace: move_data = move_data.copy() @@ -208,8 +212,12 @@ def create_or_update_out_of_the_bbox( logger.debug('\nCreate or update boolean feature to detect points out of the bbox') filtered_ = filters.by_bbox(move_data, bbox, filter_out=True) + if filtered_ is None: + raise ValueError('Filter bbox failed!') + logger.debug('...Creating a new label named as %s' % new_label) move_data[new_label] = False + if filtered_.shape[0] > 0: logger.debug('...Setting % as True\n' % new_label) move_data.at[filtered_.index, new_label] = True @@ -222,9 +230,9 @@ def create_or_update_out_of_the_bbox( @timer_decorator def create_or_update_gps_deactivated_signal( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - max_time_between_adj_points: Optional[float] = 7200, - new_label: Optional[Text] = DEACTIVATED, - inplace: Optional[bool] = False + max_time_between_adj_points: float = 7200, + new_label: Text = DEACTIVATED, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Creates a new feature that inform if point invalid. @@ -272,9 +280,9 @@ def create_or_update_gps_deactivated_signal( @timer_decorator def create_or_update_gps_jump( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - max_dist_between_adj_points: Optional[float] = 3000, - new_label: Optional[Text] = JUMP, - inplace: Optional[bool] = False + max_dist_between_adj_points: float = 3000, + new_label: Text = JUMP, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Creates a new feature that inform if point is a gps jump. @@ -321,13 +329,13 @@ def create_or_update_gps_jump( @timer_decorator def create_or_update_short_trajectory( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - max_dist_between_adj_points: Optional[float] = 3000, - max_time_between_adj_points: Optional[float] = 7200, - max_speed_between_adj_points: Optional[float] = 50, - k_segment_max: Optional[int] = 50, - label_tid: Optional[Text] = TID_PART, - new_label: Optional[Text] = SHORT, - inplace: Optional[bool] = False + max_dist_between_adj_points: float = 3000, + max_time_between_adj_points: float = 7200, + max_speed_between_adj_points: float = 50, + k_segment_max: int = 50, + label_tid: Text = TID_PART, + new_label: Text = SHORT, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Creates a new feature that inform if point belongs to a short trajectory. @@ -390,10 +398,10 @@ def create_or_update_short_trajectory( @timer_decorator def create_or_update_gps_block_signal( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - max_time_stop: Optional[float] = 7200, - new_label: Optional[Text] = BLOCK, - label_tid: Optional[Text] = TID_PART, - inplace: Optional[bool] = False + max_time_stop: float = 7200, + new_label: Text = BLOCK, + label_tid: Text = TID_PART, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Creates a new feature that inform segments with periods without moving. @@ -453,11 +461,11 @@ def create_or_update_gps_block_signal( @timer_decorator def filter_block_signal_by_repeated_amount_of_points( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - amount_max_of_points_stop: Optional[float] = 30.0, - max_time_stop: Optional[float] = 7200, - filter_out: Optional[bool] = False, - label_tid: Optional[Text] = TID_PART, - inplace: Optional[bool] = False + amount_max_of_points_stop: float = 30.0, + max_time_stop: float = 7200, + filter_out: bool = False, + label_tid: Text = TID_PART, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Filters from dataframe points with blocked signal by amount of points. @@ -514,10 +522,10 @@ def filter_block_signal_by_repeated_amount_of_points( @timer_decorator def filter_block_signal_by_time( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - max_time_stop: Optional[float] = 7200, - filter_out: Optional[bool] = False, - label_tid: Optional[Text] = TID_PART, - inplace: Optional[bool] = False + max_time_stop: float = 7200, + filter_out: bool = False, + label_tid: Text = TID_PART, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Filters from dataframe points with blocked signal by time. @@ -575,12 +583,12 @@ def filter_block_signal_by_time( @timer_decorator def filter_longer_time_to_stop_segment_by_id( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - dist_radius: Optional[float] = 30, - time_radius: Optional[float] = 900, - label_id: Optional[Text] = TRAJ_ID, - label_segment_stop: Optional[Text] = SEGMENT_STOP, - filter_out: Optional[bool] = False, - inplace: Optional[bool] = False + dist_radius: float = 30, + time_radius: float = 900, + label_id: Text = TRAJ_ID, + label_segment_stop: Text = SEGMENT_STOP, + filter_out: bool = False, + inplace: bool = False ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Filters from dataframe segment with longest stop time. diff --git a/pymove/tests/test_utils_mem.py b/pymove/tests/test_utils_mem.py index d9044f7f..65042f32 100644 --- a/pymove/tests/test_utils_mem.py +++ b/pymove/tests/test_utils_mem.py @@ -29,15 +29,13 @@ def test_reduce_mem_usage_automatic(): move_df = _default_move_df() - expected_initial_size = 280 - - expected_final_size = 232 - - assert abs(mem.total_size(move_df) - expected_initial_size) <= 20 + initial_size = mem.total_size(move_df) mem.reduce_mem_usage_automatic(move_df) - assert abs(mem.total_size(move_df) - expected_final_size) <= 20 + final_size = mem.total_size(move_df) + + assert initial_size > final_size def test_total_size(): diff --git a/pymove/utils/conversions.py b/pymove/utils/conversions.py index b17460c1..9cb45b84 100644 --- a/pymove/utils/conversions.py +++ b/pymove/utils/conversions.py @@ -30,6 +30,7 @@ from typing import TYPE_CHECKING, List, Optional, Text, Union import numpy as np +from numpy import ndarray from pandas import DataFrame from shapely.geometry import Point @@ -81,7 +82,7 @@ def lat_meters(lat: float) -> float: def meters_to_eps( - radius_meters: float, earth_radius: Optional[float] = EARTH_RADIUS + radius_meters: float, earth_radius: float = EARTH_RADIUS ) -> float: """ Converts radius in meters to eps. @@ -108,7 +109,7 @@ def meters_to_eps( return radius_meters / earth_radius -def list_to_str(input_list: List, delimiter: Optional[Text] = ',') -> Text: +def list_to_str(input_list: List, delimiter: Text = ',') -> Text: """ Concatenates a list elements, joining them by the separator `delimiter`. @@ -191,7 +192,7 @@ def list_to_svm_line(original_list: List) -> Text: return svm_line.rstrip() -def lon_to_x_spherical(lon: float) -> float: +def lon_to_x_spherical(lon: Union[float, ndarray]) -> Union[float, ndarray]: """ Convert longitude to X EPSG:3857 WGS 84/Pseudo-Mercator. @@ -221,7 +222,7 @@ def lon_to_x_spherical(lon: float) -> float: return 6378137 * np.radians(lon) -def lat_to_y_spherical(lat: float) -> float: +def lat_to_y_spherical(lat: Union[float, ndarray]) -> Union[float, ndarray]: """ Convert latitude to Y EPSG:3857 WGS 84/Pseudo-Mercator. @@ -251,7 +252,7 @@ def lat_to_y_spherical(lat: float) -> float: return 6378137 * np.log(np.tan(np.pi / 4 + np.radians(lat) / 2.0)) -def x_to_lon_spherical(x: float) -> float: +def x_to_lon_spherical(x: Union[float, ndarray]) -> Union[float, ndarray]: """ Convert X EPSG:3857 WGS 84 / Pseudo-Mercator to longitude. @@ -280,7 +281,7 @@ def x_to_lon_spherical(x: float) -> float: return np.degrees(x / 6378137.0) -def y_to_lat_spherical(y: float) -> float: +def y_to_lat_spherical(y: Union[float, ndarray]) -> Union[float, ndarray]: """ Convert Y EPSG:3857 WGS 84 / Pseudo-Mercator to latitude. @@ -311,9 +312,9 @@ def y_to_lat_spherical(y: float) -> float: def geometry_points_to_lat_and_lon( move_data: DataFrame, - geometry_label: Optional[Text] = GEOMETRY, - drop_geometry: Optional[bool] = False, - inplace: Optional[bool] = False + geometry_label: Text = GEOMETRY, + drop_geometry: bool = False, + inplace: bool = False ) -> DataFrame: """ Creates lat and lon columns from Points in geometry column. @@ -368,8 +369,8 @@ def geometry_points_to_lat_and_lon( def lat_and_lon_decimal_degrees_to_decimal( move_data: DataFrame, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE ) -> DataFrame: """ Converts latitude and longitude format from decimal degrees to decimal format. @@ -419,9 +420,9 @@ def _decimal_degree_to_decimal(row): def ms_to_kmh( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_speed: Optional[Text] = SPEED_TO_PREV, - new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + label_speed: Text = SPEED_TO_PREV, + new_label: Text = None, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in ms, in label_speed column to kmh. @@ -496,9 +497,9 @@ def ms_to_kmh( def kmh_to_ms( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_speed: Optional[Text] = SPEED_TO_PREV, + label_speed: Text = SPEED_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in kmh, in label_speed column to ms. @@ -567,9 +568,9 @@ def kmh_to_ms( def meters_to_kilometers( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_distance: Optional[Text] = DIST_TO_PREV, + label_distance: Text = DIST_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in meters, in label_distance column to kilometers. @@ -637,9 +638,9 @@ def meters_to_kilometers( def kilometers_to_meters( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_distance: Optional[Text] = DIST_TO_PREV, + label_distance: Text = DIST_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in kilometers, in label_distance column to meters. @@ -708,9 +709,9 @@ def kilometers_to_meters( def seconds_to_minutes( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Optional[Text] = TIME_TO_PREV, + label_time: Text = TIME_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in seconds, in label_distance column to minutes. @@ -778,9 +779,9 @@ def seconds_to_minutes( def minute_to_seconds( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Optional[Text] = TIME_TO_PREV, + label_time: Text = TIME_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in minutes, in label_distance column to seconds. @@ -849,9 +850,9 @@ def minute_to_seconds( def minute_to_hours( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Optional[Text] = TIME_TO_PREV, + label_time: Text = TIME_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in minutes, in label_distance column to hours. @@ -921,9 +922,9 @@ def minute_to_hours( def hours_to_minute( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Optional[Text] = TIME_TO_PREV, + label_time: Text = TIME_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in hours, in label_distance column to minute. @@ -992,9 +993,9 @@ def hours_to_minute( def seconds_to_hours( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Optional[Text] = TIME_TO_PREV, + label_time: Text = TIME_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in seconds, in label_distance column to hours. @@ -1063,9 +1064,9 @@ def seconds_to_hours( def hours_to_seconds( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Optional[Text] = TIME_TO_PREV, + label_time: Text = TIME_TO_PREV, new_label: Optional[Text] = None, - inplace: Optional[bool] = False, + inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ Convert values, in hours, in label_distance column to seconds. diff --git a/pymove/utils/data_augmentation.py b/pymove/utils/data_augmentation.py index 87c45960..d5f2e1b5 100644 --- a/pymove/utils/data_augmentation.py +++ b/pymove/utils/data_augmentation.py @@ -28,7 +28,9 @@ def append_row( - data: DataFrame, row: Optional[Series] = None, columns: Optional[Dict] = None + data: DataFrame, + row: Optional[Series] = None, + columns: Optional[Dict] = None ): """ Insert a new line in the dataframe with the information passed by parameter. @@ -103,7 +105,7 @@ def generate_trajectories_df( def generate_start_feature( - data: DataFrame, label_trajectory: Optional[Text] = TRAJECTORY + data: DataFrame, label_trajectory: Text = TRAJECTORY ): """ Removes the last point from the trajectory and adds it in a new column 'destiny'. @@ -123,7 +125,7 @@ def generate_start_feature( def generate_destiny_feature( - data: DataFrame, label_trajectory: Optional[Text] = TRAJECTORY + data: DataFrame, label_trajectory: Text = TRAJECTORY ): """ Removes the first point from the trajectory and adds it in a new column 'start'. @@ -143,7 +145,7 @@ def generate_destiny_feature( def split_crossover( - sequence_a: List, sequence_b: List, frac: Optional[float] = 0.5 + sequence_a: List, sequence_b: List, frac: float = 0.5 ) -> Tuple[List, List]: """ Divides two arrays in the indicated ratio and exchange their halves. @@ -178,7 +180,7 @@ def split_crossover( return sequence_a, sequence_b -def _augmentation(data: DataFrame, aug_df: DataFrame, frac: Optional[float] = 0.5): +def _augmentation(data: DataFrame, aug_df: DataFrame, frac: float = 0.5): """ Generates new data with unobserved trajectories. @@ -223,8 +225,8 @@ def _augmentation(data: DataFrame, aug_df: DataFrame, frac: Optional[float] = 0. value2 = data.at[idx_, col] if isinstance(value1, str) and isinstance(value2, str): - sequences1.append(value1 + '_' + value2) - sequences2.append(value2 + '_' + value1) + sequences1.append(value1 + '_' + value2) # type: ignore + sequences2.append(value2 + '_' + value1) # type: ignore else: sequences1.append(value1) sequences2.append(value2) @@ -238,10 +240,10 @@ def _augmentation(data: DataFrame, aug_df: DataFrame, frac: Optional[float] = 0. def augmentation_trajectories_df( data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - restriction: Optional[Text] = 'destination only', - label_trajectory: Optional[Text] = TRAJECTORY, - insert_at_df: Optional[bool] = False, - frac: Optional[float] = 0.5, + restriction: Text = 'destination only', + label_trajectory: Text = TRAJECTORY, + insert_at_df: bool = False, + frac: float = 0.5, ) -> DataFrame: """ Generates new data from unobserved trajectories, given a specific restriction. @@ -334,9 +336,9 @@ def insert_points_in_df(data: DataFrame, aug_df: DataFrame): def instance_crossover_augmentation( data: DataFrame, - restriction: Optional[Text] = 'destination only', - label_trajectory: Optional[Text] = TRAJECTORY, - frac: Optional[float] = 0.5 + restriction: Text = 'destination only', + label_trajectory: Text = TRAJECTORY, + frac: float = 0.5 ): """ Generates new data from unobserved trajectories, with a specific restriction. diff --git a/pymove/utils/datetime.py b/pymove/utils/datetime.py index 523ffdac..7bc6a61a 100644 --- a/pymove/utils/datetime.py +++ b/pymove/utils/datetime.py @@ -22,7 +22,7 @@ """ -from datetime import date, datetime +from datetime import datetime from typing import Optional, Text, Union import holidays @@ -222,7 +222,7 @@ def to_day_of_week_int(dt: datetime) -> int: def working_day( dt: Union[Text, datetime], - country: Optional[Text] = 'BR', + country: Text = 'BR', state: Optional[Text] = None ) -> bool: """ @@ -267,7 +267,7 @@ def working_day( dt = str_to_datetime(dt) if isinstance(dt, datetime): - dt = date(dt.year, dt.month, dt.day) + dt = datetime(dt.year, dt.month, dt.day) if dt in holidays.CountryHoliday(country=country, prov=None, state=state): result = False @@ -489,11 +489,11 @@ def diff_time(start_time: datetime, end_time: datetime) -> int: def create_time_slot_in_minute( data: DataFrame, - slot_interval: Optional[int] = 15, - initial_slot: Optional[int] = 0, - label_datetime: Optional[Text] = DATETIME, - label_time_slot: Optional[Text] = TIME_SLOT, - inplace: Optional[bool] = False + slot_interval: int = 15, + initial_slot: int = 0, + label_datetime: Text = DATETIME, + label_time_slot: Text = TIME_SLOT, + inplace: bool = False ) -> Optional[DataFrame]: """ Partitions the time in slot windows. @@ -548,7 +548,7 @@ def create_time_slot_in_minute( def generate_time_statistics( data: DataFrame, - local_label: Optional[Text] = LOCAL_LABEL + local_label: Text = LOCAL_LABEL ): """ Calculates time statistics of the pairwise local labels. @@ -637,9 +637,9 @@ def _calc_time_threshold(seg_mean: float, seg_std: float) -> float: def threshold_time_statistics( df_statistics: DataFrame, - mean_coef: Optional[float] = 1.0, - std_coef: Optional[float] = 1.0, - inplace: Optional[bool] = False + mean_coef: float = 1.0, + std_coef: float = 1.0, + inplace: bool = False ) -> Optional[DataFrame]: """ Calculates and creates the threshold column. diff --git a/pymove/utils/distances.py b/pymove/utils/distances.py index b21d6b76..ca9ef88c 100644 --- a/pymove/utils/distances.py +++ b/pymove/utils/distances.py @@ -8,7 +8,7 @@ medt """ -from typing import Optional, Text, Union +from typing import Text, Union import numpy as np import pandas as pd @@ -25,8 +25,8 @@ def haversine( lon1: Union[float, ndarray], lat2: Union[float, ndarray], lon2: Union[float, ndarray], - to_radians: Optional[bool] = True, - earth_radius: Optional[float] = EARTH_RADIUS + to_radians: bool = True, + earth_radius: float = EARTH_RADIUS ) -> Union[float, ndarray]: """ Calculates the great circle distance between two points on the earth. @@ -72,7 +72,7 @@ def haversine( """ if to_radians: - lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2]) + lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2]) # type: ignore a = ( np.sin((lat2 - lat1) / 2.0) ** 2 + np.cos(lat1) @@ -130,8 +130,8 @@ def euclidean_distance_in_meters( def nearest_points( traj1: DataFrame, traj2: DataFrame, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, ) -> DataFrame: """ Returns the point closest to another trajectory based on the Euclidean distance. @@ -191,8 +191,8 @@ def nearest_points( def medp( traj1: DataFrame, traj2: DataFrame, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE ) -> float: """ Returns the Mean Euclidian Distance Predictive between two trajectories. @@ -243,9 +243,9 @@ def medp( def medt( traj1: DataFrame, traj2: DataFrame, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME ) -> float: """ Returns the Mean Euclidian Distance Trajectory between two trajectories. @@ -286,7 +286,7 @@ def medt( >>> medt(traj_1, traj_2) 6.592419887747872e-05 """ - soma = 0 + soma = 0. proportion = 1000000000 if(len(traj2) < len(traj1)): traj1, traj2 = traj2, traj1 @@ -307,6 +307,7 @@ def medt( soma = soma + this_distance for j in range(len(traj1) + 1, len(traj2)): soma = soma + \ - float(utils.datetime.timestamp_to_millis( - traj2[datetime].iloc[j])) / proportion + float( + utils.datetime.timestamp_to_millis(traj2[datetime].iloc[j]) + ) / proportion return soma diff --git a/pymove/utils/geoutils.py b/pymove/utils/geoutils.py index 8f068eb8..0b414165 100644 --- a/pymove/utils/geoutils.py +++ b/pymove/utils/geoutils.py @@ -8,7 +8,7 @@ """ -from typing import Optional, Text, Tuple +from typing import Text, Tuple import geohash2 as gh import numpy as np @@ -69,7 +69,7 @@ def v_color(ob: BaseGeometry) -> Text: return COLORS[ob.is_simple + 33] -def _encode(lat: float, lon: float, precision: Optional[float] = 15) -> Text: +def _encode(lat: float, lon: float, precision: float = 15) -> Text: """ Encodes latitude/longitude to geohash. @@ -131,7 +131,7 @@ def _decode(geohash: Text) -> Tuple[float, float]: return gh.decode(geohash) -def _bin_geohash(lat: float, lon: float, precision: Optional[float] = 15) -> ndarray: +def _bin_geohash(lat: float, lon: float, precision: float = 15) -> ndarray: """ Transforms a point's geohash into a binary array. @@ -168,7 +168,7 @@ def _bin_geohash(lat: float, lon: float, precision: Optional[float] = 15) -> nda def _reset_and_create_arrays_none( - data: DataFrame, reset_index: Optional[bool] = True + data: DataFrame, reset_index: bool = True ) -> Tuple[ndarray, ndarray, ndarray, ndarray]: """ Reset the df index and create arrays of none values. @@ -218,7 +218,7 @@ def _reset_and_create_arrays_none( return latitudes, longitudes, geohash, bin_geohash -def create_geohash_df(data: DataFrame, precision: Optional[float] = 15): +def create_geohash_df(data: DataFrame, precision: float = 15): """ Create geohash from geographic coordinates and integrate with df. @@ -263,7 +263,7 @@ def create_geohash_df(data: DataFrame, precision: Optional[float] = 15): data[GEOHASH] = geohash -def create_bin_geohash_df(data: DataFrame, precision: Optional[float] = 15): +def create_bin_geohash_df(data: DataFrame, precision: float = 15): """ Create trajectory geohash binaries and integrate with df. @@ -310,8 +310,8 @@ def create_bin_geohash_df(data: DataFrame, precision: Optional[float] = 15): def decode_geohash_to_latlon( data: DataFrame, - label_geohash: Optional[Text] = GEOHASH, - reset_index: Optional[bool] = True + label_geohash: Text = GEOHASH, + reset_index: bool = True ): """ Decode feature with hash of trajectories back to geographic coordinates. diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index fd320af9..7fdf07e8 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -49,7 +49,7 @@ from pymove.utils.log import logger, progress_bar -def union_poi_bank(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): +def union_poi_bank(data: DataFrame, label_poi: Text = TYPE_POI): """ Performs the union between the different bank categories. @@ -76,7 +76,7 @@ def union_poi_bank(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): data.at[data[filter_bank].index, label_poi] = 'banks' -def union_poi_bus_station(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): +def union_poi_bus_station(data: DataFrame, label_poi: Text = TYPE_POI): """ Performs the union between the different bus station categories. @@ -97,7 +97,7 @@ def union_poi_bus_station(data: DataFrame, label_poi: Optional[Text] = TYPE_POI) data.at[data[filter_bus_station].index, label_poi] = 'bus_station' -def union_poi_bar_restaurant(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): +def union_poi_bar_restaurant(data: DataFrame, label_poi: Text = TYPE_POI): """ Performs the union between bar and restaurant categories. @@ -116,7 +116,7 @@ def union_poi_bar_restaurant(data: DataFrame, label_poi: Optional[Text] = TYPE_P data.at[data[filter_bar_restaurant].index, label_poi] = 'bar-restaurant' -def union_poi_parks(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): +def union_poi_parks(data: DataFrame, label_poi: Text = TYPE_POI): """ Performs the union between park categories. @@ -135,7 +135,7 @@ def union_poi_parks(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): data.at[data[filter_parks].index, label_poi] = 'parks' -def union_poi_police(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): +def union_poi_police(data: DataFrame, label_poi: Text = TYPE_POI): """ Performs the union between police categories. @@ -155,7 +155,7 @@ def union_poi_police(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): def join_collective_areas( - gdf_: DataFrame, gdf_rules_: DataFrame, label_geometry: Optional[Text] = GEOMETRY + gdf_: DataFrame, gdf_rules_: DataFrame, label_geometry: Text = GEOMETRY ): """ Performs the integration between trajectories and collective areas. @@ -187,9 +187,9 @@ def join_collective_areas( def _reset_and_creates_id_and_lat_lon( data: DataFrame, df_pois: DataFrame, - lat_lon_poi: Optional[bool] = True, - reset_index: Optional[bool] = True -) -> Tuple[ndarray, ndarray, ndarray, ndarray]: + lat_lon_poi: bool = True, + reset_index: bool = True +) -> Tuple[ndarray, ndarray, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -284,7 +284,7 @@ def _reset_set_window__and_creates_event_id_type( def _reset_set_window_and_creates_event_id_type_all( - data: DataFrame, df_events: DataFrame, label_date: Text, time_window: int + data: DataFrame, df_events: DataFrame, label_date: Text, time_window: float ) -> Tuple[Series, Series, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -301,7 +301,7 @@ def _reset_set_window_and_creates_event_id_type_all( The input event point of interest data. label_date : str Label of data referring to the datetime. - time_window : Int + time_window : float Number of seconds of the time window. Returns @@ -330,9 +330,9 @@ def _reset_set_window_and_creates_event_id_type_all( def join_with_pois( data: DataFrame, df_pois: DataFrame, - label_id: Optional[Text] = TRAJ_ID, - label_poi_name: Optional[Text] = NAME_POI, - reset_index: Optional[Text] = True + label_id: Text = TRAJ_ID, + label_poi_name: Text = NAME_POI, + reset_index: bool = True ): """ Performs the integration between trajectories and points of interest. @@ -394,10 +394,10 @@ def join_with_pois( def join_with_pois_optimizer( data, df_pois: DataFrame, - label_id: Optional[Text] = TRAJ_ID, - label_poi_name: Optional[Text] = NAME_POI, + label_id: Text = TRAJ_ID, + label_poi_name: Text = NAME_POI, dist_poi: Optional[List] = None, - reset_index: Optional[Text] = True + reset_index: bool = True ): """ Performs the integration between trajectories and points of interest. @@ -426,6 +426,8 @@ def join_with_pois_optimizer( by default True """ + if dist_poi is None: + dist_poi = [] if len(df_pois[label_poi_name].unique()) == len(dist_poi): values = _reset_and_creates_id_and_lat_lon(data, df_pois, False, reset_index) minimum_distances, ids_pois, tag_pois, lat_poi, lon_poi = values @@ -444,7 +446,7 @@ def join_with_pois_optimizer( # First iteration is minimum distances if idx == 0: - minimum_distances = np.float64( + minimum_distances = np.array( haversine( lat_poi, lon_poi, @@ -486,8 +488,8 @@ def join_with_pois_optimizer( def join_with_pois_by_category( data: DataFrame, df_pois: DataFrame, - label_category: Optional[Text] = TYPE_POI, - label_id: Optional[Text] = TRAJ_ID + label_category: Text = TYPE_POI, + label_id: Text = TRAJ_ID ): """ Performs the integration between trajectories and points of interest. @@ -561,10 +563,10 @@ def join_with_pois_by_category( def join_with_poi_datetime( data: DataFrame, df_events: DataFrame, - label_date: Optional[Text] = DATETIME, - time_window: Optional[int] = 900, - label_event_id: Optional[Text] = EVENT_ID, - label_event_type: Optional[Text] = EVENT_TYPE + label_date: Text = DATETIME, + time_window: int = 900, + label_event_id: Text = EVENT_ID, + label_event_type: Text = EVENT_TYPE ): """ Performs the integration between trajectories and points of interest. @@ -591,6 +593,11 @@ def join_with_poi_datetime( label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE + Raises + ------ + ValueError + If feature generation fails + """ values = _reset_set_window__and_creates_event_id_type( data, df_events, label_date, time_window @@ -602,6 +609,10 @@ def join_with_poi_datetime( df_filtered = filters.by_datetime( df_events, window_starts[idx], window_ends[idx] ) + + if df_filtered is None: + raise ValueError('Filter datetime failed!') + size_filter = df_filtered.shape[0] if size_filter > 0: @@ -638,10 +649,10 @@ def join_with_poi_datetime( def join_with_poi_datetime_optimizer( data: DataFrame, df_events: DataFrame, - label_date: Optional[Text] = DATETIME, - time_window: Optional[int] = 900, - label_event_id: Optional[Text] = EVENT_ID, - label_event_type: Optional[Text] = EVENT_TYPE + label_date: Text = DATETIME, + time_window: int = 900, + label_event_id: Text = EVENT_ID, + label_event_type: Text = EVENT_TYPE ): """ Performs a optimized integration between trajectories and points of events. @@ -668,6 +679,11 @@ def join_with_poi_datetime_optimizer( label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE + Raises + ------ + ValueError + If feature generation fails + """ values = _reset_set_window__and_creates_event_id_type( data, df_events, label_date, time_window @@ -691,6 +707,9 @@ def join_with_poi_datetime_optimizer( data, window_starts[idx], window_ends[idx] ) + if df_filtered is None: + raise ValueError('Filtering datetime failed!') + size_filter = df_filtered.shape[0] if size_filter > 0: @@ -737,11 +756,11 @@ def join_with_poi_datetime_optimizer( def join_with_pois_by_dist_and_datetime( data: DataFrame, df_pois: DataFrame, - label_date: Optional[Text] = DATETIME, - label_event_id: Optional[Text] = EVENT_ID, - label_event_type: Optional[Text] = EVENT_TYPE, - time_window: Optional[float] = 3600, - radius: Optional[float] = 1000, + label_date: Text = DATETIME, + label_event_id: Text = EVENT_ID, + label_event_type: Text = EVENT_TYPE, + time_window: float = 3600, + radius: float = 1000, ): """ Performs the integration between trajectories and points of interest. @@ -769,6 +788,11 @@ def join_with_pois_by_dist_and_datetime( radius: float, optional maximum radius of pois, by default 1000 + Raises + ------ + ValueError + If feature generation fails + """ if label_date not in df_pois: raise KeyError("POI's DataFrame must contain a %s column" % label_date) @@ -789,9 +813,12 @@ def join_with_pois_by_dist_and_datetime( # filter event by radius df_filtered = filters.by_bbox( - df_pois, bbox + df_pois, bbox, inplace=False ) + if df_filtered is None: + raise ValueError('Filtering bbox failed') + # filter event by datetime filters.by_datetime( df_filtered, @@ -836,10 +863,10 @@ def join_with_pois_by_dist_and_datetime( def join_with_home_by_id( data: DataFrame, df_home: DataFrame, - label_id: Optional[Text] = TRAJ_ID, - label_address: Optional[Text] = ADDRESS, - label_city: Optional[Text] = CITY, - drop_id_without_home: Optional[bool] = False, + label_id: Text = TRAJ_ID, + label_address: Text = ADDRESS, + label_city: Text = CITY, + drop_id_without_home: bool = False, ): """ Performs the integration between trajectories and home points. @@ -913,12 +940,12 @@ def join_with_home_by_id( def merge_home_with_poi( data: DataFrame, - label_dist_poi: Optional[Text] = DIST_POI, - label_name_poi: Optional[Text] = NAME_POI, - label_id_poi: Optional[Text] = ID_POI, - label_home: Optional[Text] = HOME, - label_dist_home: Optional[Text] = DIST_HOME, - drop_columns: Optional[bool] = True, + label_dist_poi: Text = DIST_POI, + label_name_poi: Text = NAME_POI, + label_id_poi: Text = ID_POI, + label_home: Text = HOME, + label_dist_home: Text = DIST_HOME, + drop_columns: bool = True, ): """ Performs or merges the points of interest and the trajectories. diff --git a/pymove/utils/log.py b/pymove/utils/log.py index 053f5f0d..c8fed867 100644 --- a/pymove/utils/log.py +++ b/pymove/utils/log.py @@ -34,7 +34,7 @@ def set_verbosity(level): shell_handler.setLevel(level) -def timer_decorator(func: Callable) -> wraps: +def timer_decorator(func: Callable) -> Callable: """A decorator that prints how long a function took to run.""" @wraps(func) @@ -75,7 +75,7 @@ def _log_progress( is_iterator = False if total is None: try: - total = len(sequence) + total = len(sequence) # type: ignore except TypeError: is_iterator = True if total is not None: diff --git a/pymove/utils/math.py b/pymove/utils/math.py index 0fb4019b..14dd4b94 100644 --- a/pymove/utils/math.py +++ b/pymove/utils/math.py @@ -203,7 +203,7 @@ def arrays_avg( 'values_array and qt_array must have the same number of rows' ) - result = 0 + result = 0. for i, j in zip(values_array, weights_array): result += i * j @@ -238,8 +238,8 @@ def array_stats(values_array: List[float]) -> Tuple[float, float, int]: >>> print(array_stats(list), type(array_stats(list))) (39.5, 327.25, 5) """ - sum_ = 0 - sum_sq = 0 + sum_ = 0. + sum_sq = 0. n = 0 for item in values_array: sum_ += item diff --git a/pymove/utils/mem.py b/pymove/utils/mem.py index d14e74f5..f67b26ae 100644 --- a/pymove/utils/mem.py +++ b/pymove/utils/mem.py @@ -11,11 +11,12 @@ """ import os +import re import time from collections import deque from itertools import chain from sys import getsizeof -from typing import Callable, Dict, Optional, Text +from typing import Dict, Text import numpy as np import psutil @@ -55,7 +56,7 @@ def reduce_mem_usage_automatic(df: DataFrame): for col in df.columns: col_type = df[col].dtype - if str(col_type) == 'int': + if re.match('int', str(col_type)): c_min = df[col].min() c_max = df[col].max() if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: @@ -95,7 +96,7 @@ def reduce_mem_usage_automatic(df: DataFrame): and c_max < np.iinfo(np.uint64).max ): df[col] = df[col].astype(np.uint64) - elif col_type == np.float: + elif re.match('float', str(col_type)): c_min = df[col].min() c_max = df[col].max() if ( @@ -119,7 +120,7 @@ def reduce_mem_usage_automatic(df: DataFrame): def total_size( - o: object, handlers: Dict = None, verbose: Optional[bool] = True + o: object, handlers: Dict = None, verbose: bool = True ) -> float: """ Calculates the approximate memory footprint of an given object. @@ -268,13 +269,13 @@ def end_operation(operation: Dict) -> Dict: } -def sizeof_fmt(mem_usage: int, suffix: Optional[Text] = 'B') -> Text: +def sizeof_fmt(mem_usage: float, suffix: Text = 'B') -> Text: """ Returns the memory usage calculation of the last function. Parameters ---------- - mem_usage : int + mem_usage : float memory usage in bytes suffix: string, optional @@ -300,7 +301,7 @@ def sizeof_fmt(mem_usage: int, suffix: Optional[Text] = 'B') -> Text: def top_mem_vars( - variables: Callable, n: Optional[int] = 10, hide_private=True + variables: Dict, n: int = 10, hide_private=True ) -> DataFrame: """ Shows the sizes of the active variables. @@ -332,7 +333,7 @@ def top_mem_vars( 3 top_mem_vars 136.0 B 4 np 72.0 B """ - vars_ = ((name, getsizeof(value)) for name, value in variables.items()) + vars_ = iter([(name, getsizeof(value)) for name, value in variables.items()]) if hide_private: vars_ = filter(lambda x: not x[0].startswith('_'), vars_) top_vars = DataFrame( diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index ff822616..7b2036a9 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -14,7 +14,7 @@ from itertools import chain -from typing import Any, Dict, List, Optional, Text, Union +from typing import Any, Dict, List, Optional, Text, Tuple, Union import numpy as np from numpy import ndarray @@ -29,12 +29,12 @@ def read_csv( filepath_or_buffer: FilePathOrBuffer, - latitude: Optional[Text] = LATITUDE, - longitude: Optional[Text] = LONGITUDE, - datetime: Optional[Text] = DATETIME, - traj_id: Optional[Text] = TRAJ_ID, - type_: Optional[Text] = TYPE_PANDAS, - n_partitions: Optional[int] = 1, + latitude: Text = LATITUDE, + longitude: Text = LONGITUDE, + datetime: Text = DATETIME, + traj_id: Text = TRAJ_ID, + type_: Text = TYPE_PANDAS, + n_partitions: int = 1, **kwargs ) -> MoveDataFrame: """ @@ -119,7 +119,9 @@ def invert_dict(d: Dict) -> Dict: def flatten_dict( - d: Dict, parent_key: Optional[Text] = '', sep: Optional[Text] = '_' + d: Dict, + parent_key: Text = '', + sep: Text = '_' ) -> Dict: """ Flattens a nested dictionary. @@ -151,9 +153,9 @@ def flatten_dict( """ if not isinstance(d, dict): return {parent_key: d} - items = [] + items: List[Tuple[Text, Any]] = [] for k, v in d.items(): - new_key = parent_key + sep + k if parent_key else k + new_key = f'{parent_key}{sep}{k}' if parent_key else k if isinstance(v, dict): items.extend(flatten_dict(v, new_key, sep=sep).items()) else: @@ -264,6 +266,8 @@ def shift( [2 3 4 5 6 7 0] """ result = np.empty_like(arr) + arr = np.array(arr) + if fill_value is None: dtype = result.dtype if np.issubdtype(dtype, np.bool_): diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 2599960d..2355b7ab 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -11,7 +11,7 @@ """ -from typing import List, Optional, Text, Tuple +from typing import List, Text, Tuple from branca.element import MacroElement, Template from folium import Map @@ -329,7 +329,7 @@ def get_cmap(cmap: Text) -> Colormap: def save_wkt( - move_data: DataFrame, filename: Text, label_id: Optional[Text] = TRAJ_ID + move_data: DataFrame, filename: Text, label_id: Text = TRAJ_ID ): """ Save a visualization in a map in a new file .wkt. diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index aa24b06c..35560a6f 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -64,10 +64,10 @@ def save_map( move_data: DataFrame, filename: Text, - tiles: Optional[Text] = TILES[0], - label_id: Optional[Text] = TRAJ_ID, - cmap: Optional[Text] = 'Set1', - return_map: Optional[bool] = False + tiles: Text = TILES[0], + label_id: Text = TRAJ_ID, + cmap: Text = 'Set1', + return_map: bool = False ) -> Optional[Map]: """ Save a visualization in a map in a new file. @@ -123,8 +123,8 @@ def create_base_map( move_data: DataFrame, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - tile: Optional[Text] = TILES[0], - default_zoom_start: Optional[float] = 12, + tile: Text = TILES[0], + default_zoom_start: float = 12, ) -> Map: """ Generates a folium map. @@ -164,12 +164,12 @@ def heatmap( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - radius: Optional[float] = 8, + zoom_start: float = 12, + radius: float = 8, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'heatmap.html', + tile: Text = TILES[0], + save_as_html: bool = False, + filename: Text = 'heatmap.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -237,14 +237,14 @@ def heatmap_with_time( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - radius: Optional[float] = 8, - min_opacity: Optional[float] = 0.5, - max_opacity: Optional[float] = 0.8, + zoom_start: float = 12, + radius: float = 8, + min_opacity: float = 0.5, + max_opacity: float = 0.8, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'heatmap_time.html', + tile: Text = TILES[0], + save_as_html: bool = False, + filename: Text = 'heatmap_time.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -329,11 +329,11 @@ def cluster( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, + zoom_start: float = 12, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'cluster.html', + tile: Text = TILES[0], + save_as_html: bool = False, + filename: Text = 'cluster.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -407,11 +407,11 @@ def faster_cluster( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, + zoom_start: float = 12, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'faster_cluster.html', + tile: Text = TILES[0], + save_as_html: bool = False, + filename: Text = 'faster_cluster.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -480,11 +480,11 @@ def plot_markers( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, + zoom_start: float = 12, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'markers.html', + tile: Text = TILES[0], + save_as_html: bool = False, + filename: Text = 'markers.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -729,9 +729,9 @@ def _add_trajectories_to_folium_map( move_data: DataFrame, items: Tuple, base_map: Map, - legend: Optional[bool] = True, - save_as_html: Optional[bool] = True, - filename: Optional[Text] = 'map.html', + legend: bool = True, + save_as_html: bool = True, + filename: Text = 'map.html', ): """ Adds a trajectory to a folium map with begin and end markers. @@ -773,14 +773,14 @@ def plot_trajectories_with_folium( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None, - filename: Optional[Text] = 'plot_trajectories_with_folium.html', + filename: Text = 'plot_trajectories_with_folium.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -851,13 +851,13 @@ def plot_trajectory_by_id_folium( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, - filename: Optional[Text] = 'plot_trajectories_with_folium.html', + filename: Text = 'plot_trajectories_with_folium.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -930,14 +930,14 @@ def plot_trajectory_by_period( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None, - filename: Optional[Text] = 'plot_trajectories_by_period.html', + filename: Text = 'plot_trajectories_by_period.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1021,14 +1021,14 @@ def plot_trajectory_by_day_week( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None, - filename: Optional[Text] = 'plot_trajectories_by_day_week.html', + filename: Text = 'plot_trajectories_by_day_week.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1113,14 +1113,14 @@ def plot_trajectory_by_date( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None, - filename: Optional[Text] = 'plot_trajectories_by_date.html', + filename: Text = 'plot_trajectories_by_date.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1213,14 +1213,14 @@ def plot_trajectory_by_hour( n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None, - filename: Optional[Text] = 'plot_trajectories_by_hour.html', + filename: Text = 'plot_trajectories_by_hour.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1301,19 +1301,19 @@ def plot_trajectory_by_hour( def plot_stops( move_data: DataFrame, - radius: Optional[float] = 0, - weight: Optional[float] = 3, + radius: float = 0, + weight: float = 3, id_: Optional[int] = None, n_rows: Optional[int] = None, lat_origin: Optional[float] = None, lon_origin: Optional[float] = None, - zoom_start: Optional[float] = 12, - legend: Optional[bool] = True, + zoom_start: float = 12, + legend: bool = True, base_map: Optional[Map] = None, - tile: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, + tile: Text = TILES[0], + save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, - filename: Optional[Text] = 'plot_stops.html', + filename: Text = 'plot_stops.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1411,10 +1411,10 @@ def plot_stops( def plot_bbox( bbox_tuple: Tuple[float, float, float, float], base_map: Optional[Map] = None, - tiles: Optional[Text] = TILES[0], - color: Optional[Text] = 'red', - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'bbox.html' + tiles: Text = TILES[0], + color: Text = 'red', + save_as_html: bool = False, + filename: Text = 'bbox.html' ) -> Map: """ Plots a bbox using Folium. @@ -1528,15 +1528,15 @@ def _circle_maker( def plot_points_folium( move_data: DataFrame, - user_lat: Optional[Text] = LATITUDE, - user_lon: Optional[Text] = LONGITUDE, - user_point: Optional[Text] = USER_POINT, - radius: Optional[float] = 2, + user_lat: Text = LATITUDE, + user_lon: Text = LONGITUDE, + user_point: Text = USER_POINT, + radius: float = 2, base_map: Optional[Map] = None, slice_tags: Optional[List] = None, - tiles: Optional[Text] = TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'points.html' + tiles: Text = TILES[0], + save_as_html: bool = False, + filename: Text = 'points.html' ) -> Map: """ Generates a folium map with the trajectories plots and a point. @@ -1614,8 +1614,8 @@ def plot_poi_folium( base_map=None, slice_tags=None, tiles=TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'pois.html' + save_as_html: bool = False, + filename: Text = 'pois.html' ) -> Map: """ Receives a MoveDataFrame and returns a folium map with poi points. @@ -1671,8 +1671,8 @@ def plot_event_folium( base_map=None, slice_tags=None, tiles=TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'events.html' + save_as_html: bool = False, + filename: Text = 'events.html' ) -> Map: """ Receives a MoveDataFrame and returns a folium map with events. @@ -1722,17 +1722,17 @@ def show_trajs_with_event( df_event: DataFrame, window_time_event: float, radius: float, - event_lat: Optional[Text] = LATITUDE, - event_lon: Optional[Text] = LONGITUDE, - event_datetime: Optional[Text] = DATETIME, - user_lat: Optional[Text] = LATITUDE, - user_lon: Optional[Text] = LONGITUDE, - user_datetime: Optional[Text] = DATETIME, - event_id: Optional[Text] = EVENT_ID, - event_point: Optional[Text] = EVENT_POINT, - user_id: Optional[Text] = UID, - user_point: Optional[Text] = USER_POINT, - line_color: Optional[Text] = LINE_COLOR, + event_lat: Text = LATITUDE, + event_lon: Text = LONGITUDE, + event_datetime: Text = DATETIME, + user_lat: Text = LATITUDE, + user_lon: Text = LONGITUDE, + user_datetime: Text = DATETIME, + event_id: Text = EVENT_ID, + event_point: Text = EVENT_POINT, + user_id: Text = UID, + user_point: Text = USER_POINT, + line_color: Text = LINE_COLOR, slice_event_show: Optional[int] = None, slice_subject_show: Optional[int] = None, ) -> List[Map]: @@ -1904,17 +1904,17 @@ def show_traj_id_with_event( window_time_event: float, radius: float, subject_id: int, - event_lat: Optional[Text] = LATITUDE, - event_lon: Optional[Text] = LONGITUDE, - event_datetime: Optional[Text] = DATETIME, - user_lat: Optional[Text] = LATITUDE, - user_lon: Optional[Text] = LONGITUDE, - user_datetime: Optional[Text] = DATETIME, - event_id: Optional[Text] = EVENT_ID, - event_point: Optional[Text] = EVENT_POINT, - user_id: Optional[Text] = UID, - user_point: Optional[Text] = USER_POINT, - line_color: Optional[Text] = LINE_COLOR, + event_lat: Text = LATITUDE, + event_lon: Text = LONGITUDE, + event_datetime: Text = DATETIME, + user_lat: Text = LATITUDE, + user_lon: Text = LONGITUDE, + user_datetime: Text = DATETIME, + event_id: Text = EVENT_ID, + event_point: Text = EVENT_POINT, + user_id: Text = UID, + user_point: Text = USER_POINT, + line_color: Text = LINE_COLOR, slice_event_show: Optional[int] = None, slice_subject_show: Optional[int] = None, ) -> Map: @@ -1991,9 +1991,9 @@ def show_traj_id_with_event( def _create_geojson_features_line( move_data: DataFrame, - label_lat: Optional[Text] = LATITUDE, - label_lon: Optional[Text] = LONGITUDE, - label_datetime: Optional[Text] = DATETIME + label_lat: Text = LATITUDE, + label_lon: Text = LONGITUDE, + label_datetime: Text = DATETIME ) -> List: """ Create geojson features. @@ -2063,8 +2063,8 @@ def plot_traj_timestamp_geo_json( label_lon=LONGITUDE, label_datetime=DATETIME, tiles=TILES[0], - save_as_html: Optional[bool] = False, - filename: Optional[Text] = 'events.html' + save_as_html: bool = False, + filename: Text = 'events.html' ) -> Map: """ Plot trajectories wit geo_json. diff --git a/pymove/visualization/matplotlib.py b/pymove/visualization/matplotlib.py index 60559fb0..a2105cdb 100644 --- a/pymove/visualization/matplotlib.py +++ b/pymove/visualization/matplotlib.py @@ -37,12 +37,12 @@ def show_object_id_by_date( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - create_features: Optional[bool] = True, + create_features: bool = True, kind: Optional[List] = None, - figsize: Optional[Tuple[float, float]] = (21, 9), - return_fig: Optional[bool] = True, - save_fig: Optional[bool] = True, - name: Optional[Text] = 'shot_points_by_date.png', + figsize: Tuple[float, float] = (21, 9), + return_fig: bool = True, + save_fig: bool = True, + name: Text = 'shot_points_by_date.png', ) -> Optional[figure]: """ Generates four visualizations based on datetime feature. @@ -120,12 +120,12 @@ def show_object_id_by_date( def plot_trajectories( move_data: DataFrame, - markers: Optional[Text] = 'o', - markersize: Optional[float] = 12, - figsize: Optional[Tuple[float, float]] = (10, 10), - return_fig: Optional[bool] = True, - save_fig: Optional[bool] = True, - name: Optional[Text] = 'trajectories.png', + markers: Text = 'o', + markersize: float = 12, + figsize: Tuple[float, float] = (10, 10), + return_fig: bool = True, + save_fig: bool = True, + name: Text = 'trajectories.png', ) -> Optional[figure]: """ Generate a visualization that show trajectories. @@ -174,14 +174,14 @@ def plot_trajectories( def plot_traj_by_id( move_data: DataFrame, id_: Union[int, Text], - label: Optional[Text] = TID, + label: Text = TID, feature: Optional[Text] = None, value: Optional[Any] = None, - linewidth: Optional[float] = 3, - markersize: Optional[float] = 20, - figsize: Optional[Tuple[float, float]] = (10, 10), - return_fig: Optional[bool] = True, - save_fig: Optional[bool] = True, + linewidth: float = 3, + markersize: float = 20, + figsize: Tuple[float, float] = (10, 10), + return_fig: bool = True, + save_fig: bool = True, name: Optional[Text] = None, ) -> Optional[figure]: """ @@ -272,11 +272,11 @@ def plot_traj_by_id( def plot_all_features( move_data: DataFrame, - dtype: Optional[Callable] = float, - figsize: Optional[Tuple[float, float]] = (21, 15), - return_fig: Optional[bool] = True, - save_fig: Optional[bool] = True, - name: Optional[Text] = 'features.png', + dtype: Callable = float, + figsize: Tuple[float, float] = (21, 15), + return_fig: bool = True, + save_fig: bool = True, + name: Text = 'features.png', ) -> Optional[figure]: """ Generate a visualization for each columns that type is equal dtype. @@ -326,7 +326,7 @@ def plot_all_features( return fig -def plot_coords(ax: axes, ob: BaseGeometry, color: Optional[Text] = 'r'): +def plot_coords(ax: axes, ob: BaseGeometry, color: Text = 'r'): """ Plot the coordinates of each point of the object in a 2D chart. @@ -370,11 +370,11 @@ def plot_bounds(ax: axes, ob: Union[LineString, MultiLineString], color='b'): def plot_line( ax: axes, ob: LineString, - color: Optional[Text] = 'r', - alpha: Optional[float] = 0.7, - linewidth: Optional[float] = 3, - solid_capstyle: Optional[Text] = 'round', - zorder: Optional[float] = 2 + color: Text = 'r', + alpha: float = 0.7, + linewidth: float = 3, + solid_capstyle: Text = 'round', + zorder: float = 2 ): """ Plot a LineString. diff --git a/setup.cfg b/setup.cfg index 64dc7c24..7b4e4f12 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,6 +6,10 @@ select = B,C,E,D,N,F,W exclude = pymove/core/interface.py, pymove/tests/* docstring-convention = numpy +[mypy] +ignore_missing_imports = True +no_warn_no_return = True + [isort] multi_line_output = 3 include_trailing_comma = True From 39c884221326e99087436741bba0cee9a0a5efea Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 8 Jun 2021 16:41:28 -0300 Subject: [PATCH 17/56] fix mypy errors --- .pre-commit-config.yaml | 8 +- Makefile | 7 +- pymove/preprocessing/filters.py | 10 +-- pymove/utils/visual.py | 4 +- pymove/visualization/folium.py | 127 +++++++++++++++++--------------- 5 files changed, 81 insertions(+), 75 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 34dc62dd..a9b80b57 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: 'flake8-docstrings==1.6.0', 'pep8-naming==0.11.1' ] -# - repo: https://github.com/pre-commit/mirrors-mypy -# rev: 'v0.812' -# hooks: -# - id: mypy +- repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.812' + hooks: + - id: mypy diff --git a/Makefile b/Makefile index 5904aeb8..d0059a0f 100644 --- a/Makefile +++ b/Makefile @@ -19,14 +19,15 @@ clean: rm -f .coverage test: clean - pytest + pytest pymove coverage: clean - coverage run -m pytest + coverage run -m pytest pymove coverage report lint: clean - flake8 + flake8 pymove + mypy pymove docs: clean cp docs/examples/notebooks.rst docs diff --git a/pymove/preprocessing/filters.py b/pymove/preprocessing/filters.py index 0f2c9d28..052881d4 100644 --- a/pymove/preprocessing/filters.py +++ b/pymove/preprocessing/filters.py @@ -77,7 +77,7 @@ def get_bbox_by_radius( lonmin = lon - delta_lon lonmax = lon + delta_lon - return tuple(np.rad2deg([latmin, lonmin, latmax, lonmax])) + return tuple(np.rad2deg([latmin, lonmin, latmax, lonmax])) # type: ignore def by_bbox( @@ -378,7 +378,7 @@ def _filter_speed_max_radius(move_data: DataFrame, **kwargs): return move_data[filter_] -def _filter_data(move_data: DataFrame, f: callable, kwargs: Dict): +def _filter_data(move_data: DataFrame, f: Callable, kwargs: Dict): """ Filter the dataframe using condition from given function. @@ -420,7 +420,7 @@ def _filter_data(move_data: DataFrame, f: callable, kwargs: Dict): return filter_data_points, rows_to_drop -def _clean_gps(move_data: DataFrame, f: callable, **kwargs): +def _clean_gps(move_data: DataFrame, f: Callable, **kwargs): """ Cleans gps points from a dataframe using condition from given function. @@ -470,7 +470,7 @@ def clean_gps_jumps_by_distance( label_id: Text = TRAJ_ID, jump_coefficient: float = 3.0, threshold: float = 1, - label_dtype: callable = np.float64, + label_dtype: Callable = np.float64, inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ @@ -527,7 +527,7 @@ def clean_gps_nearby_points_by_distances( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], label_id: Text = TRAJ_ID, radius_area: float = 10.0, - label_dtype: callable = np.float64, + label_dtype: Callable = np.float64, inplace: bool = False, ) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: """ diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 2355b7ab..80d0a36f 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -11,7 +11,7 @@ """ -from typing import List, Text, Tuple +from typing import Sequence, Text, Tuple, Union from branca.element import MacroElement, Template from folium import Map @@ -23,7 +23,7 @@ from pymove.utils.constants import COLORS, LATITUDE, LONGITUDE, TRAJ_ID -def add_map_legend(m: Map, title: Text, items: List[Tuple]): +def add_map_legend(m: Map, title: Text, items: Union[Tuple, Sequence[Tuple]]): """ Adds a legend for a folium map. diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 35560a6f..99b3a01e 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -25,7 +25,8 @@ """ -from typing import Any, Dict, List, Optional, Text, Tuple, Union +from datetime import date +from typing import Any, Dict, List, Optional, Sequence, Text, Tuple, Union import folium import numpy as np @@ -567,9 +568,9 @@ def _filter_and_generate_colors( move_data: DataFrame, id_: Optional[int] = None, n_rows: Optional[int] = None, - color: Optional[Text] = None, + color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None -) -> Tuple[DataFrame, List[Tuple]]: +) -> Tuple[DataFrame, List[Tuple[Any, Any]]]: """ Filters the dataframe and generate colors for folium map. @@ -599,13 +600,13 @@ def _filter_and_generate_colors( n_rows = move_data.shape[0] if id_ is not None: - mv_df = move_data[move_data[TRAJ_ID] == id_].iloc[:n_rows][ + mv_df = move_data[move_data[TRAJ_ID] == id_].head(n_rows)[ [LATITUDE, LONGITUDE, DATETIME, TRAJ_ID] ] if not len(mv_df): raise IndexError('No user with id %s in dataframe' % id_) else: - mv_df = move_data.iloc[:n_rows][ + mv_df = move_data.head(n_rows)[ [LATITUDE, LONGITUDE, DATETIME, TRAJ_ID] ] @@ -727,7 +728,7 @@ def _add_begin_end_markers_to_folium_map( def _add_trajectories_to_folium_map( move_data: DataFrame, - items: Tuple, + items: Sequence[Tuple], base_map: Map, legend: bool = True, save_as_html: bool = True, @@ -1107,8 +1108,8 @@ def plot_trajectory_by_day_week( def plot_trajectory_by_date( move_data: DataFrame, - start_date: Text, - end_date: Text, + start_date: Union[Text, date], + end_date: Union[Text, date], id_: Optional[int] = None, n_rows: Optional[int] = None, lat_origin: Optional[float] = None, @@ -1463,7 +1464,7 @@ def plot_bbox( return base_map -def _format_tags(line, slice_): +def _format_tags(line: Union[List, Dict], slice_: List) -> Text: """ Create or format tags. @@ -1484,13 +1485,13 @@ def _format_tags(line, slice_): def _circle_maker( - iter_tuple, - user_lat, - user_lon, - slice_tags, - user_point, - radius, - map_ + iter_tuple: DataFrame, + user_lat: Text, + user_lon: Text, + slice_tags: List, + user_point: Text, + radius: float, + map_: Map ): """ Return a circle. @@ -1502,7 +1503,7 @@ def _circle_maker( Latitude column name. user_lon: str. Longitude column name. - slice_tags: + slice_tags: list or iterable user_point: str. Point color. @@ -1571,8 +1572,8 @@ def plot_points_folium( Map A folium map """ - if not slice_tags: - slice_tags = move_data.columns + if slice_tags is None: + slice_tags = list(move_data.columns) # If not have a map a map is create with mean to lat and lon if not base_map: @@ -1585,20 +1586,16 @@ def plot_points_folium( tile=tiles ) - list( - map( - lambda x: _circle_maker( - x, - user_lat, - user_lon, - slice_tags, - user_point, - radius, - base_map - ), - move_data.iterrows() + for row in move_data.iterrows(): + _circle_maker( + row, + user_lat, + user_lon, + slice_tags, + user_point, + radius, + base_map ) - ) if save_as_html: base_map.save(outfile=filename) @@ -1606,14 +1603,14 @@ def plot_points_folium( def plot_poi_folium( - move_data, - poi_lat=LATITUDE, - poi_lon=LONGITUDE, - poi_point=POI_POINT, - radius=2, - base_map=None, - slice_tags=None, - tiles=TILES[0], + move_data: DataFrame, + poi_lat: Text = LATITUDE, + poi_lon: Text = LONGITUDE, + poi_point: Text = POI_POINT, + radius: float = 2, + base_map: Optional[Map] = None, + slice_tags: Optional[List] = None, + tiles: Text = TILES[0], save_as_html: bool = False, filename: Text = 'pois.html' ) -> Map: @@ -1663,14 +1660,14 @@ def plot_poi_folium( def plot_event_folium( - move_data, - event_lat=LATITUDE, - event_lon=LONGITUDE, - event_point=EVENT_POINT, - radius=2, - base_map=None, - slice_tags=None, - tiles=TILES[0], + move_data: DataFrame, + event_lat: Text = LATITUDE, + event_lon: Text = LONGITUDE, + event_point: Text = EVENT_POINT, + radius: float = 2, + base_map: Optional[Map] = None, + slice_tags: Optional[List] = None, + tiles: Text = TILES[0], save_as_html: bool = False, filename: Text = 'events.html' ) -> Map: @@ -1733,8 +1730,8 @@ def show_trajs_with_event( user_id: Text = UID, user_point: Text = USER_POINT, line_color: Text = LINE_COLOR, - slice_event_show: Optional[int] = None, - slice_subject_show: Optional[int] = None, + slice_event_show: Optional[List] = None, + slice_subject_show: Optional[List] = None, ) -> List[Map]: """ Plot a trajectory, including your user_points lat lon and your tags. @@ -1771,15 +1768,20 @@ def show_trajs_with_event( User point color, by default USER_POINT. line_color: str, optional Line color, by default 'blue'. - slice_event_show: int, optional + slice_event_show: list, optional by default None. - slice_subject_show: int, optional + slice_subject_show: list, optional by default None. Returns ------- list of Map A list of folium maps. + + Raises + ------ + ValueError + If feature generation fails """ # building structure for deltas delta_event = pd.to_timedelta(window_time_event, unit='s') @@ -1814,6 +1816,9 @@ def show_trajs_with_event( end_datetime=end_time ) + if df_filtered is None: + raise ValueError('Filter datetime failed!') + # length of df_temp len_df_temp = df_filtered.shape[0] @@ -1915,8 +1920,8 @@ def show_traj_id_with_event( user_id: Text = UID, user_point: Text = USER_POINT, line_color: Text = LINE_COLOR, - slice_event_show: Optional[int] = None, - slice_subject_show: Optional[int] = None, + slice_event_show: Optional[List] = None, + slice_subject_show: Optional[List] = None, ) -> Map: """ Plot a trajectory, including your user_points lat lon and your tags. @@ -1955,9 +1960,9 @@ def show_traj_id_with_event( User point color, by default USER_POINT. line_color: str, optional Line color, by default 'blue'. - slice_event_show: int, optional + slice_event_show: list, optional by default None. - slice_subject_show: int, optional + slice_subject_show: list, optional by default None. Returns @@ -2058,11 +2063,11 @@ def _create_geojson_features_line( def plot_traj_timestamp_geo_json( - move_data, - label_lat=LATITUDE, - label_lon=LONGITUDE, - label_datetime=DATETIME, - tiles=TILES[0], + move_data: DataFrame, + label_lat: Text = LATITUDE, + label_lon: Text = LONGITUDE, + label_datetime: Text = DATETIME, + tiles: Text = TILES[0], save_as_html: bool = False, filename: Text = 'events.html' ) -> Map: From a434c15c5a6892f736f701c36cb79d911c0aa6a1 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 8 Jun 2021 16:44:13 -0300 Subject: [PATCH 18/56] added mypy to requirements-dev --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 853862e7..2969b829 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,6 +4,7 @@ flake8 flake8-bugbear flake8-docstrings geopandas +mypy pep8-naming pre-commit pytest From 359467de4ca5335a32737082aab248411e16833b Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 8 Jun 2021 16:53:21 -0300 Subject: [PATCH 19/56] pinned mypy version to requirements-dev --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 2969b829..dd74c405 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,7 +4,7 @@ flake8 flake8-bugbear flake8-docstrings geopandas -mypy +mypy==0.812 pep8-naming pre-commit pytest From fbc52a6a8859f7ddf4a052095a22d3648b4b818c Mon Sep 17 00:00:00 2001 From: Regis Pires Magalhaes Date: Tue, 8 Jun 2021 17:51:55 -0300 Subject: [PATCH 20/56] Update README.md --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ff5994fe..0fc86dea 100644 --- a/README.md +++ b/README.md @@ -255,12 +255,16 @@ You can see examples of how to use PyMove [here](https://github.com/InsightLab/P - `by_max_time` - `by_max_speed` -- 7: **Distance of Trajectory** → `pymove.query.query` +- 7: **Distance Measures** → `pymove.distance` + - `medp + - `medt` + - `euclidean_distance_in_meters` + - `haversine` + +- 8: **Query Historical Trajectories** → `pymove.query.query` - `range_query` - `knn_query` -- 8: **Query Historical Trajectories** - - 9: **Managing Recent Trajectories** - 10: **Privacy Preserving** From ffbcd964688d01634a44db78cdb79344f3ba2d0c Mon Sep 17 00:00:00 2001 From: Regis Pires Magalhaes Date: Tue, 8 Jun 2021 17:52:35 -0300 Subject: [PATCH 21/56] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0fc86dea..954f4bcb 100644 --- a/README.md +++ b/README.md @@ -255,8 +255,8 @@ You can see examples of how to use PyMove [here](https://github.com/InsightLab/P - `by_max_time` - `by_max_speed` -- 7: **Distance Measures** → `pymove.distance` - - `medp +- 7: **Distance Measures** → `pymove.distances` + - `medp` - `medt` - `euclidean_distance_in_meters` - `haversine` From ccdb086569019ca84399846bda09eddecb986f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Wed, 9 Jun 2021 22:58:40 -0300 Subject: [PATCH 22/56] putting examples on integration module --- pymove/utils/integration.py | 574 +++++++++++++++++++++++++++++++++++- 1 file changed, 571 insertions(+), 3 deletions(-) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index fd320af9..dadf74c9 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -62,6 +62,33 @@ def union_poi_bank(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + Examples + -------- + >>> from pymove.utils.integration import union_poi_bank + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 bank + 1 39.984198 116.319322 2 randomvalue + 2 39.984224 116.319402 3 bancos_postos + 3 39.984211 116.319389 4 randomvalue + 4 39.984217 116.319422 5 bancos_PAE + 5 39.984710 116.319865 6 bancos_postos + 6 39.984674 116.319810 7 bancos_agencias + 7 39.984623 116.319773 8 bancos_filiais + 8 39.984606 116.319732 9 banks + 9 39.984555 116.319728 10 banks + >>> union_poi_bank(pois_df) + lat lon id type_poi + 0 39.984094 116.319236 1 banks + 1 39.984198 116.319322 2 randomvalue + 2 39.984224 116.319402 3 banks + 3 39.984211 116.319389 4 randomvalue + 4 39.984217 116.319422 5 banks + 5 39.984710 116.319865 6 banks + 6 39.984674 116.319810 7 banks + 7 39.984623 116.319773 8 banks + 8 39.984606 116.319732 9 banks + 9 39.984555 116.319728 10 banks """ logger.debug('union bank categories to one category') logger.debug('... There are {} -- {}'.format(data[label_poi].nunique(), label_poi)) @@ -89,6 +116,30 @@ def union_poi_bus_station(data: DataFrame, label_poi: Optional[Text] = TYPE_POI) label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + Examples + -------- + >>> from pymove.utils.integration import union_poi_bus_station + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 transit_station + 1 39.984198 116.319322 2 randomvalue + 2 39.984224 116.319402 3 transit_station + 3 39.984211 116.319389 4 pontos_de_onibus + 4 39.984217 116.319422 5 transit_station + 5 39.984710 116.319865 6 randomvalue + 6 39.984674 116.319810 7 bus_station + 7 39.984623 116.319773 8 bus_station + >>> union_poi_bus_station(pois_df) + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 bus_station + 1 39.984198 116.319322 2 randomvalue + 2 39.984224 116.319402 3 bus_station + 3 39.984211 116.319389 4 bus_station + 4 39.984217 116.319422 5 bus_station + 5 39.984710 116.319865 6 randomvalue + 6 39.984674 116.319810 7 bus_station + 7 39.984623 116.319773 8 bus_station """ logger.debug('union bus station categories to one category') filter_bus_station = data[label_poi].isin( @@ -110,6 +161,30 @@ def union_poi_bar_restaurant(data: DataFrame, label_poi: Optional[Text] = TYPE_P label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + Examples + -------- + >>> from pymove.utils.integration import union_poi_bar_restaurant + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 restaurant + 1 39.984198 116.319322 2 restaurant + 2 39.984224 116.319402 3 randomvalue + 3 39.984211 116.319389 4 bar + 4 39.984217 116.319422 5 bar + 5 39.984710 116.319865 6 bar-restaurant + 6 39.984674 116.319810 7 random123 + 7 39.984623 116.319773 8 123 + >>> union_poi_bar_restaurant(pois_df) + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 bar-restaurant + 1 39.984198 116.319322 2 bar-restaurant + 2 39.984224 116.319402 3 randomvalue + 3 39.984211 116.319389 4 bar-restaurant + 4 39.984217 116.319422 5 bar-restaurant + 5 39.984710 116.319865 6 bar-restaurant + 6 39.984674 116.319810 7 random123 + 7 39.984623 116.319773 8 123 """ logger.debug('union restaurant and bar categories to one category') filter_bar_restaurant = data[label_poi].isin(['restaurant', 'bar']) @@ -129,6 +204,29 @@ def union_poi_parks(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + Examples + -------- + >>> from pymove.utils.integration import union_poi_parks + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 pracas_e_parques + 1 39.984198 116.319322 2 park + 2 39.984224 116.319402 3 parks + 3 39.984211 116.319389 4 random + 4 39.984217 116.319422 5 123 + 5 39.984710 116.319865 6 park + 6 39.984674 116.319810 7 parks + 7 39.984623 116.319773 8 pracas_e_parques + >>> union_poi_parks(pois_df) + lat lon id type_poi + 0 39.984094 116.319236 1 parks + 1 39.984198 116.319322 2 parks + 2 39.984224 116.319402 3 parks + 3 39.984211 116.319389 4 random + 4 39.984217 116.319422 5 123 + 5 39.984710 116.319865 6 parks + 6 39.984674 116.319810 7 parks + 7 39.984623 116.319773 8 parks """ logger.debug('union parks categories to one category') filter_parks = data[label_poi].isin(['pracas_e_parques', 'park']) @@ -148,6 +246,30 @@ def union_poi_police(data: DataFrame, label_poi: Optional[Text] = TYPE_POI): label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + Examples + -------- + >>> from pymove.utils.integration import union_poi_police + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 distritos_policiais + 1 39.984198 116.319322 2 police + 2 39.984224 116.319402 3 police + 3 39.984211 116.319389 4 distritos_policiais + 4 39.984217 116.319422 5 random + 5 39.984710 116.319865 6 randomvalue + 6 39.984674 116.319810 7 123 + 7 39.984623 116.319773 8 bus_station + >>> union_poi_police(pois_df) + >>> pois_df + lat lon id type_poi + 0 39.984094 116.319236 1 police + 1 39.984198 116.319322 2 police + 2 39.984224 116.319402 3 police + 3 39.984211 116.319389 4 police + 4 39.984217 116.319422 5 random + 5 39.984710 116.319865 6 randomvalue + 6 39.984674 116.319810 7 123 + 7 39.984623 116.319773 8 bus_station """ logger.debug('union distritos policies and police categories') filter_police = data[label_poi] == 'distritos_policiais' @@ -172,6 +294,40 @@ def join_collective_areas( label_geometry : str, optional Label referring to the Point of Interest category, by default GEOMETRY + Examples + -------- + >>> from pymove.utils.integration import join_collective_areas + >>> gdf.head() + lat lon datetime id geometry + 0 39.984094 116.319236 2008-10-23 05:53:05 1 POINT (116.31924 39.98409) + 1 39.984198 116.319322 2008-10-23 05:53:06 1 POINT (116.31932 39.98420) + 2 39.984224 116.319402 2008-10-23 05:53:11 1 POINT (116.31940 39.98422) + 3 39.984211 116.319389 2008-10-23 05:53:16 1 POINT (116.31939 39.98421) + 4 39.984217 116.319422 2008-10-23 05:53:21 1 POINT (116.31942 39.98422) + >>> area_c + lat lon datetime id geometry\ + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + POINT (116.31924 39.98409) + 500 40.006436 116.317701 2008-10-23 10:53:31 1\ + POINT (116.31770 40.00644) + 1000 40.014125 116.306159 2008-10-23 23:43:56 1\ + POINT (116.30616 40.01412) + 1500 39.979009 116.326873 2008-10-24 00:11:29 1\ + POINT (116.32687 39.97901) + >>> join_collective_areas(gdf, area_c) + >>> gdf.head() + lat lon datetime id\ + geometry violating + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + POINT (116.31924 39.98409) True + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + POINT (116.31932 39.98420) False + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + POINT (116.31940 39.98422) False + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + POINT (116.31939 39.98421) False + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + POINT (116.31942 39.98422) False """ logger.debug('Integration between trajectories and collectives areas') @@ -214,6 +370,31 @@ def _reset_and_creates_id_and_lat_lon( ------- distances, ids, tags, lat, lon: arrays with default values for join operation + Examples + -------- + >>> from pymove.utils.integration import _reset_and_creates_id_and_lat_lon + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + >>> pois.head() + lat lon id type_poi name_poi + 0 39.984094 116.319236 1 policia distrito_pol_1 + 1 39.991013 116.326384 2 policia policia_federal + 2 40.010000 116.312615 3 comercio supermercado_aroldo + 3 40.013821 116.306531 4 show forro_ tropykalia + 4 40.008099 116.317711 5 risca-faca rinha_de_galo_world_cup + >>> _reset_and_creates_id_and_lat_lon(move_df, pois) + (array([inf, inf, inf, inf, inf, inf, inf, inf, inf]), + array(['', '', '', '', '', '', '', '', ''], dtype=object), + array(['', '', '', '', '', '', '', '', ''], dtype=object), + array([inf, inf, inf, inf, inf, inf, inf]), + array([inf, inf, inf, inf, inf, inf, inf])) + >>> print(type(_reset_and_creates_id_and_lat_lon(move_df, pois))) + """ if reset_index: logger.debug('... Resetting index to operation...') @@ -264,6 +445,53 @@ def _reset_set_window__and_creates_event_id_type( ------- window_starts, window_ends, current_distances, event_id, event_type + Examples + -------- + >>> from pymove.utils.integration import + _reset_set_window__and_creates_event_id_type + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + >>> pois_df + lat lon event_id datetime\ + event_type + 0 39.984094 116.319236 1 2008-10-24 01:57:57\ + show do tropykalia + 1 39.991013 116.326384 2 2008-10-24 00:22:01\ + evento da prefeitura + 2 40.010000 116.312615 3 2008-10-25 00:21:01\ + show do seu joao + 3 40.013821 116.306531 4 2008-10-26 00:22:01\ + missa + >>> _reset_set_window__and_creates_event_id_type(move_df, pois, + 'datetime', 600) + (0 2008-10-23 05:43:05 + 1 2008-10-23 10:27:26 + 2 2008-10-23 10:40:16 + 3 2008-10-23 10:53:06 + 4 2008-10-23 11:48:33 + 5 2008-10-23 23:40:45 + 6 2008-10-23 23:52:14 + 7 2008-10-24 00:12:01 + 8 2008-10-24 01:47:57 + Name: datetime, dtype: datetime64[ns], + 0 2008-10-23 06:03:05 + 1 2008-10-23 10:47:26 + 2 2008-10-23 11:00:16 + 3 2008-10-23 11:13:06 + 4 2008-10-23 12:08:33 + 5 2008-10-24 00:00:45 + 6 2008-10-24 00:12:14 + 7 2008-10-24 00:32:01 + 8 2008-10-24 02:07:57 + Name: datetime, dtype: datetime64[ns], + array([inf, inf, inf, inf, inf, inf, inf, inf, inf]), + array(['', '', '', '', '', '', '', '', ''], dtype=object), + array(['', '', '', '', '', '', '', '', ''], dtype=object)) """ # get a vector with windows time to each point data.reset_index(drop=True, inplace=True) @@ -308,6 +536,54 @@ def _reset_set_window_and_creates_event_id_type_all( ------- window_starts, window_ends, current_distances, event_id, event_type arrays with default values for join operation + + Examples + -------- + >>> from pymove.utils.integration import + _reset_set_window_and_creates_event_id_type_all + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + >>> pois_df + lat lon event_id datetime\ + event_type + 0 39.984094 116.319236 1 2008-10-24 01:57:57\ + show do tropykalia + 1 39.991013 116.326384 2 2008-10-24 00:22:01\ + evento da prefeitura + 2 40.010000 116.312615 3 2008-10-25 00:21:01\ + show do seu joao + 3 40.013821 116.306531 4 2008-10-26 00:22:01\ + missa + >>> _reset_set_window_and_creates_event_id_type_all(move_df, pois, + 'datetime', 600) + (0 2008-10-23 05:43:05 + 1 2008-10-23 10:27:26 + 2 2008-10-23 10:40:16 + 3 2008-10-23 10:53:06 + 4 2008-10-23 11:48:33 + 5 2008-10-23 23:40:45 + 6 2008-10-23 23:52:14 + 7 2008-10-24 00:12:01 + 8 2008-10-24 01:47:57 + Name: datetime, dtype: datetime64[ns], + 0 2008-10-23 06:03:05 + 1 2008-10-23 10:47:26 + 2 2008-10-23 11:00:16 + 3 2008-10-23 11:13:06 + 4 2008-10-23 12:08:33 + 5 2008-10-24 00:00:45 + 6 2008-10-24 00:12:14 + 7 2008-10-24 00:32:01 + 8 2008-10-24 02:07:57 + Name: datetime, dtype: datetime64[ns], + array([None, None, None, None, None, None, None, None, None], dtype=object), + array([None, None, None, None, None, None, None, None, None], dtype=object), + array([None, None, None, None, None, None, None, None, None], dtype=object)) """ # get a vector with windows time to each point data.reset_index(drop=True, inplace=True) @@ -355,6 +631,45 @@ def join_with_pois( Flag for reset index of the df_pois and data dataframes before the join, by default True + Examples + -------- + >>> from pymove.utils.integration import join_with_pois + >>> POIs.head() + unique_id osmid element_type amenity fee\ + geometry + 0 node/269492188 269492188 node toilets no\ + POINT (116.26750 39.98087) + 1 node/274942287 274942287 node toilets NaN\ + POINT (116.27358 39.99664) + 2 node/276320137 276320137 node fast_food NaN\ + POINT (116.33756 39.97541) + 3 node/276320142 276320142 node massage NaN\ + POINT (116.33751 39.97546) + 4 node/286242547 286242547 node toilets NaN\ + POINT (116.19982 40.00670) + .... + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + ... + >>> join_with_pois(move_df, POIs, label_id='osmid', label_poi_name='name') + >>> move_df.head() + lat lon datetime id\ + id_poi dist_poi name_poi + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 5572452688 116.862844 太平洋影城(中关村店) + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 5572452688 119.142692 太平洋影城(中关村店) + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 5572452688 116.595117 太平洋影城(中关村店) + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 5572452688 116.257378 太平洋影城(中关村店) + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 5572452688 114.886759 太平洋影城(中关村店) """ values = _reset_and_creates_id_and_lat_lon(data, df_pois, True, reset_index) current_distances, ids_pois, tag_pois, lat_user, lon_user = values @@ -425,6 +740,35 @@ def join_with_pois_optimizer( Flag for reset index of the df_pois and data dataframes before the join, by default True + Examples + -------- + >>> from pymove.utils.integration import join_with_pois_optimizer + >>> from pymove.utils.integration import join_with_pois + >>> POIs.head() + unique_id osmid element_type amenity fee\ + geometry + 0 node/269492188 269492188 node toilets no\ + POINT (116.26750 39.98087) + 1 node/274942287 274942287 node toilets NaN\ + POINT (116.27358 39.99664) + 2 node/276320137 276320137 node fast_food NaN\ + POINT (116.33756 39.97541) + 3 node/276320142 276320142 node massage NaN\ + POINT (116.33751 39.97546) + 4 node/286242547 286242547 node toilets NaN\ + POINT (116.19982 40.00670) + .... + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + ... + >>> join_with_pois_optimizer(move_df, POIs, label_id='osmid', + label_poi_name='name', dist_poi=np.array([100,9,1,50,50,10,20])) + 'the size of the dist_poi is different from the size of pois' """ if len(df_pois[label_poi_name].unique()) == len(dist_poi): values = _reset_and_creates_id_and_lat_lon(data, df_pois, False, reset_index) @@ -490,14 +834,14 @@ def join_with_pois_by_category( label_id: Optional[Text] = TRAJ_ID ): """ - Performs the integration between trajectories and points of interest. + Performs the integration between trajectories of interest points. Generating new columns referring to the category and distance from the nearest point of interest that has this category at each point of the trajectory. - Parameters - ---------- + Examples + -------- data : DataFrame The input trajectory data. df_pois : DataFrame @@ -507,6 +851,46 @@ def join_with_pois_by_category( label_id : str, optional Label of df_pois referring to the point of interest id, by default TRAJ_ID + Returns + ------- + >>> from pymove.utils.integration import join_with_pois_by_category + >>> POIs.head() + unique_id osmid element_type amenity fee/ + geometry + 0 node/269492188 269492188 node toilets no/ + POINT (116.26750 39.98087) + 1 node/274942287 274942287 node toilets NaN/ + POINT (116.27358 39.99664) + 2 node/276320137 276320137 node fast_food NaN/ + POINT (116.33756 39.97541) + 3 node/276320142 276320142 node massage NaN/ + POINT (116.33751 39.97546) + 4 node/286242547 286242547 node toilets NaN/ + POINT (116.19982 40.00670) + .... + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + ... + >>> join_with_pois_by_category(move_df, POIs, + label_category='amenity', label_id='osmid') + >>> move_df.head() + lat lon datetime id/ + id_toilets dist_toilets id_fast_food ... + 0 39.984094 116.319236 2008-10-23 05:53:05 1/ + 274942287 4132.229067 276320137 ... + 1 39.984198 116.319322 2008-10-23 05:53:06 1/ + 274942287 4135.240296 276320137 ... + 2 39.984224 116.319402 2008-10-23 05:53:11 1/ + 274942287 4140.698090 276320137 ... + 3 39.984211 116.319389 2008-10-23 05:53:16 1/ + 274942287 4140.136625 276320137 ... + 4 39.984217 116.319422 2008-10-23 05:53:21 1/ + 274942287 4142.564150 276320137 ... """ logger.debug('Integration with POIs...') @@ -591,6 +975,44 @@ def join_with_poi_datetime( label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE + Examples + -------- + >>> from pymove.utils.integration import join_with_poi_datetime + >>> POIs_events + unique_id osmid element_type amenity\ + fee + 0 node/269492188 269492188 node toilets\ + no... + 1 node/931686797 931686797 node post_office\ + NaN... + 2 node/992592626 992592626 node parking\ + NaN... + 3 node/1423043074 1423043074 node car_wash\ + NaN... + 4 node/1803755348 1803755348 node telephone\ + NaN... + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> join_with_poi_datetime(df_7, POIs_events, label_date='datetime', + time_window=900, label_event_id='osmid', label_event_type='amenity') + >>> move_df.head() + lat lon datetime id\ + osmid dist_event amenity + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 269492188 4422.237186 toilets + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 269492188 4430.488277 toilets + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 269492188 4437.521909 toilets + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 269492188 4436.297310 toilets + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 269492188 4439.154806 toilets """ values = _reset_set_window__and_creates_event_id_type( data, df_events, label_date, time_window @@ -668,6 +1090,43 @@ def join_with_poi_datetime_optimizer( label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE + Examples + -------- + >>> from pymove.utils.integration import join_with_poi_datetime_optimizer + >>> POIs_events + unique_id event_id element_type event_type\ + fee + 0 node/269492188 269492188 node toilets\ + no... + 1 node/931686797 931686797 node post_office\ + NaN... + 2 node/992592626 992592626 node parking\ + NaN... + 3 node/1423043074 1423043074 node car_wash\ + NaN... + 4 node/1803755348 1803755348 node telephone\ + NaN... + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> join_with_poi_datetime_optimizer(df_8, POIs_events) + >>> move_df.head() + lat lon datetime id\ + event_id dist_event event_type + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 269492188 4422.237186 toilets + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 269492188 4430.488277 toilets + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 269492188 4437.521909 toilets + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 269492188 4436.297310 toilets + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 269492188 4439.154806 toilets """ values = _reset_set_window__and_creates_event_id_type( data, df_events, label_date, time_window @@ -769,6 +1228,43 @@ def join_with_pois_by_dist_and_datetime( radius: float, optional maximum radius of pois, by default 1000 + Examples + -------- + >>> from pymove.utils.integration import join_with_pois_by_dist_and_datetime + >>> POIs_events + unique_id event_id element_type event_type\ + fee + 0 node/269492188 269492188 node toilets\ + no... + 1 node/931686797 931686797 node post_office\ + NaN... + 2 node/992592626 992592626 node parking\ + NaN... + 3 node/1423043074 1423043074 node car_wash\ + NaN... + 4 node/1803755348 1803755348 node telephone\ + NaN... + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> join_with_poi_datetime_optimizer(df_8, POIs_events) + >>> move_df.head() + lat lon datetime id\ + event_id dist_event event_type + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + None None None + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + None None None + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + None None None + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + None None None + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + None None None """ if label_date not in df_pois: raise KeyError("POI's DataFrame must contain a %s column" % label_date) @@ -862,6 +1358,34 @@ def join_with_home_by_id( drop_id_without_home : bool, optional flag as an option to drop id's that don't have houses, by default FALSE + Examples + -------- + >>> from pymove.utils.integration import join_with_home_by_id + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> home_df.head() + lat lon datetime id formatted_address city + 300 39.991574 116.326394 2008-10-23 10:42:03 1 Rua1, n02 ChinaTown + 301 39.991652 116.326414 2008-10-23 10:42:08 1 Rua2, n03 ChinaTown + >>> join_with_home_by_id(move_df, home_df, label_id='id') + >>> move_df.head() + lat lon datetime id\ + dist_home home city + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 1031.348370 Rua1, n02 ChinaTown + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 1017.690147 Rua1, n02 ChinaTown + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 1011.332141 Rua1, n02 ChinaTown + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 1013.152700 Rua1, n02 ChinaTown + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 1010.959220 Rua1, n02 ChinaTown """ ids_without_home = [] @@ -948,6 +1472,50 @@ def merge_home_with_poi( Flag that controls the deletion of the columns referring to the id and the distance from the home point, by default True + Examples + -------- + >>> from pymove.utils.integration import merge_home_with_poi + >>> move_df.head() + lat lon datetime id\ + dist_home home city + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 1031.348370 Rua1, n02 ChinaTown + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 1017.690147 Rua1, n02 ChinaTown + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 1011.332141 Rua1, n02 ChinaTown + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 1013.152700 Rua1, n02 ChinaTown + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 1010.959220 Rua1, n02 ChinaTown + >>> POIs.head() + unique_id osmid element_type amenity fee\ + geometry + 0 node/269492188 269492188 node toilets no\ + POINT (116.26750 39.98087) + 1 node/274942287 274942287 node toilets NaN\ + POINT (116.27358 39.99664) + 2 node/276320137 276320137 node fast_food NaN\ + POINT (116.33756 39.97541) + 3 node/276320142 276320142 node massage NaN\ + POINT (116.33751 39.97546) + 4 node/286242547 286242547 node toilets NaN\ + POINT (116.19982 40.00670) + .... + >>> join_with_pois(move_df, POIs, label_id='osmid', label_poi_name='name') + >>> move_df.head() + id lat lon datetime\ + city id_poi dist_poi name_poi + 0 1 39.984094 116.319236 2008-10-23 05:53:05\ + ChinaTown 557245268 116.862844 太平洋影城(中关村店) + 1 1 39.984198 116.319322 2008-10-23 05:53:06\ + ChinaTown 5572452688 119.142692 太平洋影城(中关村店) + 2 1 39.984224 116.319402 2008-10-23 05:53:11\ + ChinaTown 5572452688 116.595117 太平洋影城(中关村店) + 3 1 39.984211 116.319389 2008-10-23 05:53:16\ + ChinaTown 5572452688 116.257378 太平洋影城(中关村店) + 4 1 39.984217 116.319422 2008-10-23 05:53:21\ + ChinaTown 5572452688 114.886759 太平洋影城(中关村店) """ logger.debug('merge home with POI using shortest distance') idx = data[data[label_dist_home] <= data[label_dist_poi]].index From 13d0a7d6fc3744b46572fd77dae1d1124181697f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Carvalho?= Date: Thu, 10 Jun 2021 09:06:53 -0300 Subject: [PATCH 23/56] Update integration.py --- pymove/utils/integration.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index dadf74c9..e02dabf0 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -834,14 +834,14 @@ def join_with_pois_by_category( label_id: Optional[Text] = TRAJ_ID ): """ - Performs the integration between trajectories of interest points. + Performs the integration between trajectories and points of interest. Generating new columns referring to the category and distance from the nearest point of interest that has this category at each point of the trajectory. - Examples - -------- + Parameters + ---------- data : DataFrame The input trajectory data. df_pois : DataFrame @@ -851,8 +851,8 @@ def join_with_pois_by_category( label_id : str, optional Label of df_pois referring to the point of interest id, by default TRAJ_ID - Returns - ------- + Examples + -------- >>> from pymove.utils.integration import join_with_pois_by_category >>> POIs.head() unique_id osmid element_type amenity fee/ From 92f00650c74d5633aeaf215d07b9ce50b9a7a785 Mon Sep 17 00:00:00 2001 From: flych3r Date: Thu, 10 Jun 2021 10:55:07 -0300 Subject: [PATCH 24/56] added files to mypy in setup.cfg --- setup.cfg | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 7b4e4f12..5dc0bff0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,6 +9,7 @@ docstring-convention = numpy [mypy] ignore_missing_imports = True no_warn_no_return = True +files = pymove [isort] multi_line_output = 3 @@ -16,9 +17,6 @@ include_trailing_comma = True line_length = 90 known_third_party = IPython,branca,dask,dateutil,folium,geohash2,geopandas,holidays,ipywidgets,joblib,matplotlib,numpy,pandas,psutil,scipy,setuptools,shapely,sklearn,tqdm -[aliases] -test = pytest - [tool:pytest] addopts = --ignore notebooks From bc94196f3574b161c94719b55a64fc6668c0a373 Mon Sep 17 00:00:00 2001 From: flych3r Date: Thu, 10 Jun 2021 12:19:46 -0300 Subject: [PATCH 25/56] added inplace to union --- pymove/preprocessing/compression.py | 2 +- pymove/tests/test_utils_integration.py | 29 ++-- pymove/utils/integration.py | 206 +++++++++++++++++++++---- 3 files changed, 188 insertions(+), 49 deletions(-) diff --git a/pymove/preprocessing/compression.py b/pymove/preprocessing/compression.py index 3be11a53..f79963ee 100644 --- a/pymove/preprocessing/compression.py +++ b/pymove/preprocessing/compression.py @@ -94,7 +94,7 @@ def compress_segment_stop_to_point( """ if not inplace: - move_data = move_data[:] + move_data = move_data.copy() if (label_segment not in move_data) & (label_stop not in move_data): create_or_update_move_stop_by_dist_time( diff --git a/pymove/tests/test_utils_integration.py b/pymove/tests/test_utils_integration.py index 19f1c713..a9e32521 100644 --- a/pymove/tests/test_utils_integration.py +++ b/pymove/tests/test_utils_integration.py @@ -136,7 +136,7 @@ def test_union_poi_bank(): index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ) - integration.union_poi_bank(pois_df, TYPE_POI) + integration.union_poi_bank(pois_df, TYPE_POI, inplace=True) assert_frame_equal(pois_df, expected) @@ -163,7 +163,7 @@ def test_union_poi_bus_station(): index=[0, 1, 2, 3, 4, 5, 6, 7] ) - integration.union_poi_bus_station(pois_df, TYPE_POI) + integration.union_poi_bus_station(pois_df, TYPE_POI, inplace=True) assert_frame_equal(pois_df, expected) @@ -190,7 +190,7 @@ def test_union_poi_bar_restaurant(): index=[0, 1, 2, 3, 4, 5, 6, 7] ) - integration.union_poi_bar_restaurant(pois_df, TYPE_POI) + integration.union_poi_bar_restaurant(pois_df, TYPE_POI, inplace=True) assert_frame_equal(pois_df, expected) @@ -217,7 +217,7 @@ def test_union_poi_parks(): index=[0, 1, 2, 3, 4, 5, 6, 7] ) - integration.union_poi_parks(pois_df, TYPE_POI) + integration.union_poi_parks(pois_df, TYPE_POI, inplace=True) assert_frame_equal(pois_df, expected) @@ -244,38 +244,35 @@ def test_union_poi_police(): index=[0, 1, 2, 3, 4, 5, 6, 7] ) - integration.union_poi_police(pois_df, TYPE_POI) + integration.union_poi_police(pois_df, TYPE_POI, inplace=True) assert_frame_equal(pois_df, expected) -# Testes de Joins def test_join_colletive_areas(): move_df = DataFrame( data=list_move, - columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID]) - gdf = geopandas.GeoDataFrame( + columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID] + ) + move_df = geopandas.GeoDataFrame( move_df, geometry=geopandas.points_from_xy( move_df.lon, move_df.lat ) ) - indexes_ac = np.linspace(0, gdf.shape[0], 5) - area_c = gdf[gdf.index.isin(indexes_ac)].copy() + indexes_ac = np.linspace(0, move_df.shape[0], 5) + area_c = move_df[move_df.index.isin(indexes_ac)].copy() - integration.join_collective_areas(gdf, area_c) + integration.join_collective_areas(move_df, area_c, inplace=True) - expected_df = DataFrame( - data=list_move, - columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID]) expected = geopandas.GeoDataFrame( move_df, geometry=geopandas.points_from_xy( move_df.lon, move_df.lat ) ) - expected[VIOLATING] = [True, False, True, False, True, False, True, False, False] - assert_frame_equal(gdf, expected) + + assert_frame_equal(move_df, expected) def test__reset_and_creates_id_and_lat_lon(): diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index a69639a6..0df1aa9b 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -49,7 +49,12 @@ from pymove.utils.log import logger, progress_bar -def union_poi_bank(data: DataFrame, label_poi: Text = TYPE_POI): +def union_poi_bank( + data: DataFrame, + label_poi: Text = TYPE_POI, + banks: Optional[List[Text]] = None, + inplace: bool = False +) -> Optional[DataFrame]: """ Performs the union between the different bank categories. @@ -61,6 +66,23 @@ def union_poi_bank(data: DataFrame, label_poi: Text = TYPE_POI): Input points of interest data label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + banks : list of str, optional + Names of poi refering to banks, by default + banks = [ + 'bancos_filiais', + 'bancos_agencias', + 'bancos_postos', + 'bancos_PAE', + 'bank', + ] + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False + + Returns + ------- + DataFrame + data with poi or None Examples -------- @@ -90,20 +112,30 @@ def union_poi_bank(data: DataFrame, label_poi: Text = TYPE_POI): 8 39.984606 116.319732 9 banks 9 39.984555 116.319728 10 banks """ + if not inplace: + data = data.copy() logger.debug('union bank categories to one category') logger.debug('... There are {} -- {}'.format(data[label_poi].nunique(), label_poi)) - banks = [ - 'bancos_filiais', - 'bancos_agencias', - 'bancos_postos', - 'bancos_PAE', - 'bank', - ] + if banks is None: + banks = [ + 'bancos_filiais', + 'bancos_agencias', + 'bancos_postos', + 'bancos_PAE', + 'bank', + ] filter_bank = data[label_poi].isin(banks) data.at[data[filter_bank].index, label_poi] = 'banks' + if not inplace: + return data -def union_poi_bus_station(data: DataFrame, label_poi: Text = TYPE_POI): +def union_poi_bus_station( + data: DataFrame, + label_poi: Text = TYPE_POI, + bus_stations: Optional[List[Text]] = None, + inplace: bool = False +) -> Optional[DataFrame]: """ Performs the union between the different bus station categories. @@ -115,6 +147,20 @@ def union_poi_bus_station(data: DataFrame, label_poi: Text = TYPE_POI): Input points of interest data label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + bus_stations : list of str, optional + Names of poi refering to bus_stations, by default + bus_stations = [ + 'transit_station', + 'pontos_de_onibus' + ] + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False + + Returns + ------- + DataFrame + data with poi or None Examples -------- @@ -130,7 +176,6 @@ def union_poi_bus_station(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 bus_station 7 39.984623 116.319773 8 bus_station >>> union_poi_bus_station(pois_df) - >>> pois_df lat lon id type_poi 0 39.984094 116.319236 1 bus_station 1 39.984198 116.319322 2 randomvalue @@ -141,14 +186,28 @@ def union_poi_bus_station(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 bus_station 7 39.984623 116.319773 8 bus_station """ + if not inplace: + data = data.copy() logger.debug('union bus station categories to one category') + if bus_stations is None: + bus_stations = [ + 'transit_station', + 'pontos_de_onibus' + ] filter_bus_station = data[label_poi].isin( - ['transit_station', 'pontos_de_onibus'] + bus_stations ) data.at[data[filter_bus_station].index, label_poi] = 'bus_station' + if not inplace: + return data -def union_poi_bar_restaurant(data: DataFrame, label_poi: Text = TYPE_POI): +def union_poi_bar_restaurant( + data: DataFrame, + label_poi: Text = TYPE_POI, + bar_restaurant: Optional[List[Text]] = None, + inplace: bool = False +) -> Optional[DataFrame]: """ Performs the union between bar and restaurant categories. @@ -160,6 +219,20 @@ def union_poi_bar_restaurant(data: DataFrame, label_poi: Text = TYPE_POI): Input points of interest data label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + bar_restaurant : list of str, optional + Names of poi refering to bars or restaurants, by default + bar_restaurant = [ + 'restaurant', + 'bar' + ] + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False + + Returns + ------- + DataFrame + data with poi or None Examples -------- @@ -175,7 +248,6 @@ def union_poi_bar_restaurant(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 random123 7 39.984623 116.319773 8 123 >>> union_poi_bar_restaurant(pois_df) - >>> pois_df lat lon id type_poi 0 39.984094 116.319236 1 bar-restaurant 1 39.984198 116.319322 2 bar-restaurant @@ -186,12 +258,23 @@ def union_poi_bar_restaurant(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 random123 7 39.984623 116.319773 8 123 """ + if not inplace: + data = data.copy() logger.debug('union restaurant and bar categories to one category') - filter_bar_restaurant = data[label_poi].isin(['restaurant', 'bar']) + if bar_restaurant is None: + bar_restaurant = ['restaurant', 'bar'] + filter_bar_restaurant = data[label_poi].isin(bar_restaurant) data.at[data[filter_bar_restaurant].index, label_poi] = 'bar-restaurant' + if not inplace: + return data -def union_poi_parks(data: DataFrame, label_poi: Text = TYPE_POI): +def union_poi_parks( + data: DataFrame, + label_poi: Text = TYPE_POI, + parks: Optional[List[Text]] = None, + inplace: bool = False +) -> Optional[DataFrame]: """ Performs the union between park categories. @@ -203,6 +286,20 @@ def union_poi_parks(data: DataFrame, label_poi: Text = TYPE_POI): Input points of interest data label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + parks : list of str, optional + Names of poi refering to parks, by default + parks = [ + 'pracas_e_parques', + 'park' + ] + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False + + Returns + ------- + DataFrame + data with poi or None Examples -------- @@ -228,12 +325,23 @@ def union_poi_parks(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 parks 7 39.984623 116.319773 8 parks """ + if not inplace: + data = data.copy() logger.debug('union parks categories to one category') - filter_parks = data[label_poi].isin(['pracas_e_parques', 'park']) + if parks is None: + parks = ['pracas_e_parques', 'park'] + filter_parks = data[label_poi].isin(parks) data.at[data[filter_parks].index, label_poi] = 'parks' + if not inplace: + return data -def union_poi_police(data: DataFrame, label_poi: Text = TYPE_POI): +def union_poi_police( + data: DataFrame, + label_poi: Text = TYPE_POI, + police: Optional[List[Text]] = None, + inplace: bool = False +) -> Optional[DataFrame]: """ Performs the union between police categories. @@ -245,6 +353,20 @@ def union_poi_police(data: DataFrame, label_poi: Text = TYPE_POI): Input points of interest data label_poi : str, optional Label referring to the Point of Interest category, by default TYPE_POI + police : list of str, optional + Names of poi refering to police stations, by default + police = [ + 'distritos_policiais', + 'delegacia' + ] + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False + + Returns + ------- + DataFrame + data with poi or None Examples -------- @@ -260,7 +382,6 @@ def union_poi_police(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 123 7 39.984623 116.319773 8 bus_station >>> union_poi_police(pois_df) - >>> pois_df lat lon id type_poi 0 39.984094 116.319236 1 police 1 39.984198 116.319322 2 police @@ -271,14 +392,23 @@ def union_poi_police(data: DataFrame, label_poi: Text = TYPE_POI): 6 39.984674 116.319810 7 123 7 39.984623 116.319773 8 bus_station """ + if not inplace: + data = data.copy() logger.debug('union distritos policies and police categories') - filter_police = data[label_poi] == 'distritos_policiais' + if police is None: + police = ['distritos_policiais', 'delegacia'] + filter_police = data[label_poi].isin(police) data.at[data[filter_police].index, label_poi] = 'police' + if not inplace: + return data def join_collective_areas( - gdf_: DataFrame, gdf_rules_: DataFrame, label_geometry: Text = GEOMETRY -): + data: DataFrame, + areas: DataFrame, + label_geometry: Text = GEOMETRY, + inplace: bool = False +) -> Optional[DataFrame]: """ Performs the integration between trajectories and collective areas. @@ -287,17 +417,25 @@ def join_collective_areas( Parameters ---------- - gdf_ : geopandas.GeoDataFrame + data : geopandas.GeoDataFrame The input trajectory data - gdf_rules_ : geopandas.GeoDataFrame + areas : geopandas.GeoDataFrame The input coletive areas data label_geometry : str, optional Label referring to the Point of Interest category, by default GEOMETRY + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False + + Returns + ------- + DataFrame + data with joined geometries or None Examples -------- >>> from pymove.utils.integration import join_collective_areas - >>> gdf.head() + >>> data lat lon datetime id geometry 0 39.984094 116.319236 2008-10-23 05:53:05 1 POINT (116.31924 39.98409) 1 39.984198 116.319322 2008-10-23 05:53:06 1 POINT (116.31932 39.98420) @@ -314,8 +452,8 @@ def join_collective_areas( POINT (116.30616 40.01412) 1500 39.979009 116.326873 2008-10-24 00:11:29 1\ POINT (116.32687 39.97901) - >>> join_collective_areas(gdf, area_c) - >>> gdf.head() + >>> join_collective_areas(data, area_c) + lat lon datetime id\ geometry violating 0 39.984094 116.319236 2008-10-23 05:53:05 1\ @@ -329,15 +467,19 @@ def join_collective_areas( 4 39.984217 116.319422 2008-10-23 05:53:21 1\ POINT (116.31942 39.98422) False """ + if not inplace: + data = data.copy() logger.debug('Integration between trajectories and collectives areas') - polygons = gdf_rules_[label_geometry].unique() - gdf_[VIOLATING] = False + polygons = areas[label_geometry].unique() + data[VIOLATING] = False for p in progress_bar(polygons, desc='Joining trajectories and areas'): - # intersects = gdf_[label_geometry].apply(lambda x: x.intersects(p)) - intersects = gdf_[label_geometry].intersects(p) - index = gdf_[intersects].index - gdf_.at[index, VIOLATING] = True + # intersects = data[label_geometry].apply(lambda x: x.intersects(p)) + intersects = data[label_geometry].intersects(p) + index = data[intersects].index + data.at[index, VIOLATING] = True + if not inplace: + return data def _reset_and_creates_id_and_lat_lon( From ec54dc4bf11380f18fec8f96db0bdf4a9c12325a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Thu, 10 Jun 2021 13:44:38 -0300 Subject: [PATCH 26/56] adjust 01 --- pymove/utils/integration.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index a69639a6..d1ce2976 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -305,15 +305,16 @@ def join_collective_areas( 3 39.984211 116.319389 2008-10-23 05:53:16 1 POINT (116.31939 39.98421) 4 39.984217 116.319422 2008-10-23 05:53:21 1 POINT (116.31942 39.98422) >>> area_c - lat lon datetime id geometry\ - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - POINT (116.31924 39.98409) - 500 40.006436 116.317701 2008-10-23 10:53:31 1\ - POINT (116.31770 40.00644) - 1000 40.014125 116.306159 2008-10-23 23:43:56 1\ - POINT (116.30616 40.01412) - 1500 39.979009 116.326873 2008-10-24 00:11:29 1\ - POINT (116.32687 39.97901) + lat lon datetime id\ + geometry + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + POINT (116.31924 39.98409) + 1 40.006436 116.317701 2008-10-23 10:53:31 1\ + POINT (116.31770 40.00644) + 2 40.014125 116.306159 2008-10-23 23:43:56 1\ + POINT (116.30616 40.01412) + 3 39.979009 116.326873 2008-10-24 00:11:29 1\ + POINT (116.32687 39.97901) >>> join_collective_areas(gdf, area_c) >>> gdf.head() lat lon datetime id\ From 4f665522ebfc886c3c8fb0bf4e527f325b51e8ff Mon Sep 17 00:00:00 2001 From: flych3r Date: Fri, 11 Jun 2021 01:06:37 -0300 Subject: [PATCH 27/56] changed table to markdown --- README.md | 147 +++++++++--------------------------------------------- 1 file changed, 23 insertions(+), 124 deletions(-) diff --git a/README.md b/README.md index 954f4bcb..95c3ef49 100644 --- a/README.md +++ b/README.md @@ -4,120 +4,19 @@ ## Information - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Package Status - - Package status - -
    License - - Package license - -
    Python Version - - Python compatible versions - -
    Platforms - - Platforms - -
    All Platforms - - conda-forge build status - -
    PyPi Downloads - - PyPi downloads - -
    PyPi version - - PyPi version - -
    Conda Downloads - - Conda downloads - -
    Conda version - - Conda version - -
    Stars - - Github stars - -
    Forks - - Github forks - -
    Issues - - Github issues - -
    Code Quality - - Code quality - -
    Code Coverage - - Code coverage - -
    +||| +|--- |--- | +|Package Status|![https://pypi.org/project/pymove/](https://img.shields.io/pypi/status/pymove?style=for-the-badge)| +|License|![https://github.com/InsightLab/PyMove/blob/master/LICENSE](https://img.shields.io/badge/License-MIT-yellow?style=for-the-badge)| +|Python Version|![https://www.python.org/doc/versions/](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue?style=for-the-badge)| +|Platforms|![https://anaconda.org/conda-forge/pymove](https://img.shields.io/conda/pn/conda-forge/pymove?style=for-the-badge)| +|Build Status|![https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=9753&branchName=master](https://img.shields.io/azure-devops/build/conda-forge/84710dde-1620-425b-80d0-4cf5baca359d/9753/master?style=for-the-badge)| +|PyPi version|![https://pypi.org/project/pymove/#history](https://img.shields.io/pypi/v/pymove?style=for-the-badge)| +|PyPi Downloads|![https://pypi.org/project/pymove/#files](https://img.shields.io/pypi/dm/pymove?style=for-the-badge)| +|Conda version|![https://anaconda.org/conda-forge/pymove](https://img.shields.io/conda/vn/conda-forge/pymove?style=for-the-badge)| +|Conda Downloads|![https://anaconda.org/conda-forge/pymove](https://img.shields.io/conda/dn/conda-forge/pymove?style=for-the-badge)| +|Code Quality|![https://www.codacy.com/gh/InsightLab/PyMove?utm_source=github.com&utm_medium=referral&utm_content=InsightLab/PyMove&utm_campaign=Badge_Grade](https://img.shields.io/codacy/grade/26c581fbe1ee42e78a9adc50b7372ceb?style=for-the-badge)| +|Code Coverage|![https://www.codacy.com/gh/InsightLab/PyMove?utm_source=github.com&utm_medium=referral&utm_content=InsightLab/PyMove&utm_campaign=Badge_Coverage](https://img.shields.io/codacy/coverage/26c581fbe1ee42e78a9adc50b7372ceb?style=for-the-badge)| --- @@ -303,18 +202,18 @@ The library was originally created during the bachelor's thesis of 2 students fr ```txt @mastersthesis{arina2019, - title = {Uma Arquitetura E Implementação Do Módulo De Pré-processamento Para Biblioteca Pymove}, - author = {Arina De Jesus Amador Monteiro Sanches}, - year = 2019, - school = {Universidade Federal Do Ceará}, - type = {Bachelor's thesis} + title = {Uma Arquitetura E Implementação Do Módulo De Pré-processamento Para Biblioteca Pymove}, + author = {Arina De Jesus Amador Monteiro Sanches}, + year = 2019, + school = {Universidade Federal Do Ceará}, + type = {Bachelor's thesis} } @mastersthesis{andreza2019, - title = {Uma Arquitetura E Implementação Do Módulo De Visualizaçãopara Biblioteca Pymove}, - author = {Andreza Fernandes De Oliveira}, - year = 2019, - school = {Universidade Federal Do Ceará}, - type = {Bachelor's thesis} + title = {Uma Arquitetura E Implementação Do Módulo De Visualizaçãopara Biblioteca Pymove}, + author = {Andreza Fernandes De Oliveira}, + year = 2019, + school = {Universidade Federal Do Ceará}, + type = {Bachelor's thesis} } ``` From ddacdbd6a57bd1924c1dc2fa7793078b7fd4a1be Mon Sep 17 00:00:00 2001 From: Matheus Xavier Sampaio Date: Mon, 14 Jun 2021 15:09:35 -0300 Subject: [PATCH 28/56] fix table links --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 95c3ef49..e18b7512 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,17 @@ ||| |--- |--- | -|Package Status|![https://pypi.org/project/pymove/](https://img.shields.io/pypi/status/pymove?style=for-the-badge)| -|License|![https://github.com/InsightLab/PyMove/blob/master/LICENSE](https://img.shields.io/badge/License-MIT-yellow?style=for-the-badge)| -|Python Version|![https://www.python.org/doc/versions/](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue?style=for-the-badge)| -|Platforms|![https://anaconda.org/conda-forge/pymove](https://img.shields.io/conda/pn/conda-forge/pymove?style=for-the-badge)| -|Build Status|![https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=9753&branchName=master](https://img.shields.io/azure-devops/build/conda-forge/84710dde-1620-425b-80d0-4cf5baca359d/9753/master?style=for-the-badge)| -|PyPi version|![https://pypi.org/project/pymove/#history](https://img.shields.io/pypi/v/pymove?style=for-the-badge)| -|PyPi Downloads|![https://pypi.org/project/pymove/#files](https://img.shields.io/pypi/dm/pymove?style=for-the-badge)| -|Conda version|![https://anaconda.org/conda-forge/pymove](https://img.shields.io/conda/vn/conda-forge/pymove?style=for-the-badge)| -|Conda Downloads|![https://anaconda.org/conda-forge/pymove](https://img.shields.io/conda/dn/conda-forge/pymove?style=for-the-badge)| -|Code Quality|![https://www.codacy.com/gh/InsightLab/PyMove?utm_source=github.com&utm_medium=referral&utm_content=InsightLab/PyMove&utm_campaign=Badge_Grade](https://img.shields.io/codacy/grade/26c581fbe1ee42e78a9adc50b7372ceb?style=for-the-badge)| -|Code Coverage|![https://www.codacy.com/gh/InsightLab/PyMove?utm_source=github.com&utm_medium=referral&utm_content=InsightLab/PyMove&utm_campaign=Badge_Coverage](https://img.shields.io/codacy/coverage/26c581fbe1ee42e78a9adc50b7372ceb?style=for-the-badge)| +|Package Status|[](https://pypi.org/project/pymove/)| +|License|[](https://github.com/InsightLab/PyMove/blob/master/LICENSE)| +|Python Version|[](https://www.python.org/doc/versions/)| +|Platforms|[](https://anaconda.org/conda-forge/pymove)| +|Build Status|[](https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=9753&branchName=master)| +|PyPi version|[](https://pypi.org/project/pymove/#history)| +|PyPi Downloads|[](https://pypi.org/project/pymove/#files)| +|Conda version|[](https://anaconda.org/conda-forge/pymove)| +|Conda Downloads|[](https://anaconda.org/conda-forge/pymove/files)| +|Code Quality|[](https://app.codacy.com/gh/InsightLab/PyMove/dashboard)| +|Code Coverage|[](https://app.codacy.com/gh/InsightLab/PyMove/files)| --- From 60f278fd62ca070efe9a62729fb783eb60e8d414 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 15 Jun 2021 22:51:36 -0300 Subject: [PATCH 29/56] remove geopandas dataframe dependency from integration --- pymove/tests/test_utils_integration.py | 19 +++----- pymove/utils/integration.py | 61 +++++++++++++------------- requirements-dev.txt | 1 - setup.cfg | 2 +- 4 files changed, 37 insertions(+), 46 deletions(-) diff --git a/pymove/tests/test_utils_integration.py b/pymove/tests/test_utils_integration.py index a9e32521..e8fb6e1c 100644 --- a/pymove/tests/test_utils_integration.py +++ b/pymove/tests/test_utils_integration.py @@ -1,10 +1,10 @@ -import geopandas import numpy as np import pandas as pd from numpy import inf, nan from numpy.testing import assert_array_almost_equal, assert_array_equal from pandas import DataFrame, Series, Timestamp from pandas.testing import assert_frame_equal, assert_series_equal +from shapely.geometry.point import Point from pymove import MoveDataFrame from pymove.utils import integration @@ -250,26 +250,17 @@ def test_union_poi_police(): def test_join_colletive_areas(): - move_df = DataFrame( + move_df = MoveDataFrame( data=list_move, - columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID] - ) - move_df = geopandas.GeoDataFrame( - move_df, geometry=geopandas.points_from_xy( - move_df.lon, move_df.lat - ) ) + move_df['geometry'] = move_df.apply(lambda x: Point(x['lon'], x['lat']), axis=1) + expected = move_df.copy() - indexes_ac = np.linspace(0, move_df.shape[0], 5) + indexes_ac = np.linspace(0, move_df.shape[0], 5, dtype=int) area_c = move_df[move_df.index.isin(indexes_ac)].copy() integration.join_collective_areas(move_df, area_c, inplace=True) - expected = geopandas.GeoDataFrame( - move_df, geometry=geopandas.points_from_xy( - move_df.lon, move_df.lat - ) - ) expected[VIOLATING] = [True, False, True, False, True, False, True, False, False] assert_frame_equal(move_df, expected) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index c1a2b4ac..2b519872 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -18,6 +18,7 @@ """ +from collections import namedtuple from typing import List, Optional, Text, Tuple import numpy as np @@ -436,47 +437,47 @@ def join_collective_areas( -------- >>> from pymove.utils.integration import join_collective_areas >>> data - lat lon datetime id geometry - 0 39.984094 116.319236 2008-10-23 05:53:05 1 POINT (116.31924 39.98409) - 1 39.984198 116.319322 2008-10-23 05:53:06 1 POINT (116.31932 39.98420) - 2 39.984224 116.319402 2008-10-23 05:53:11 1 POINT (116.31940 39.98422) - 3 39.984211 116.319389 2008-10-23 05:53:16 1 POINT (116.31939 39.98421) - 4 39.984217 116.319422 2008-10-23 05:53:21 1 POINT (116.31942 39.98422) + lat lon datetime id geometry + 0 39.984094 116.319236 2008-10-23 05:53:05 1 POINT (116.31924 39.98409) + 1 39.984198 116.319322 2008-10-23 05:53:06 1 POINT (116.31932 39.98420) + 2 39.984224 116.319402 2008-10-23 05:53:11 1 POINT (116.31940 39.98422) + 3 39.984211 116.319389 2008-10-23 05:53:16 1 POINT (116.31939 39.98421) + 4 39.984217 116.319422 2008-10-23 05:53:21 1 POINT (116.31942 39.98422) >>> area_c - lat lon datetime id\ - geometry - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - POINT (116.31924 39.98409) - 1 40.006436 116.317701 2008-10-23 10:53:31 1\ - POINT (116.31770 40.00644) - 2 40.014125 116.306159 2008-10-23 23:43:56 1\ - POINT (116.30616 40.01412) - 3 39.979009 116.326873 2008-10-24 00:11:29 1\ + lat lon datetime id geometry + 0 39.984094 116.319236 2008-10-23 05:53:05 1 POINT (116.319236 39.984094) + 1 40.006436 116.317701 2008-10-23 10:53:31 1 POINT (116.317701 40.006436) + 2 40.014125 116.306159 2008-10-23 23:43:56 1 POINT (116.306159 40.014125) + 3 39.984211 116.319389 2008-10-23 05:53:16 1 POINT (116.319389 39.984211) POINT (116.32687 39.97901) >>> join_collective_areas(gdf, area_c) >>> gdf.head() - lat lon datetime id\ - geometry violating - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - POINT (116.31924 39.98409) True - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - POINT (116.31932 39.98420) False - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - POINT (116.31940 39.98422) False - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - POINT (116.31939 39.98421) False - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - POINT (116.31942 39.98422) False + lat lon datetime id \ + geometry violating + 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ + POINT (116.319236 39.984094) True + 1 39.984198 116.319322 2008-10-23 05:53:06 1 \ + POINT (116.319322 39.984198) False + 2 39.984224 116.319402 2008-10-23 05:53:11 1 \ + POINT (116.319402 39.984224) False + 3 39.984211 116.319389 2008-10-23 05:53:16 1 \ + POINT (116.319389 39.984211) True + 4 39.984217 116.319422 2008-10-23 05:53:21 1 \ + POINT (116.319422 39.984217) False + """ if not inplace: data = data.copy() logger.debug('Integration between trajectories and collectives areas') + Geometry = namedtuple('Geometry', 'geom coordinates') - polygons = areas[label_geometry].unique() + polygons = areas[label_geometry].apply( + lambda g: Geometry(g.__class__, g.__geo_interface__.get('coordinates')) + ).unique() + polygons = [p.geom(p.coordinates) for p in polygons] data[VIOLATING] = False for p in progress_bar(polygons, desc='Joining trajectories and areas'): - # intersects = data[label_geometry].apply(lambda x: x.intersects(p)) - intersects = data[label_geometry].intersects(p) + intersects = data[label_geometry].apply(lambda x: x.intersects(p)) index = data[intersects].index data.at[index, VIOLATING] = True if not inplace: diff --git a/requirements-dev.txt b/requirements-dev.txt index dd74c405..5e84d996 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,7 +3,6 @@ coverage flake8 flake8-bugbear flake8-docstrings -geopandas mypy==0.812 pep8-naming pre-commit diff --git a/setup.cfg b/setup.cfg index 7b4e4f12..0f9f962c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,7 +14,7 @@ no_warn_no_return = True multi_line_output = 3 include_trailing_comma = True line_length = 90 -known_third_party = IPython,branca,dask,dateutil,folium,geohash2,geopandas,holidays,ipywidgets,joblib,matplotlib,numpy,pandas,psutil,scipy,setuptools,shapely,sklearn,tqdm +known_third_party = IPython,branca,dask,dateutil,folium,geohash2,holidays,ipywidgets,joblib,matplotlib,numpy,pandas,psutil,scipy,setuptools,shapely,sklearn,tqdm [aliases] test = pytest From eb402f9746cd8abb214794b17352775b12c96e45 Mon Sep 17 00:00:00 2001 From: flych3r Date: Wed, 16 Jun 2021 11:38:31 -0300 Subject: [PATCH 30/56] removed travis from deployment-instructions --- .bumpversion.cfg | 10 -- .code-style.md | 8 +- .deployment-instructions.md | 223 ++++++++++++------------- .gitattributes | 1 - Makefile | 1 + pymove/tests/test_utils_integration.py | 4 +- pymove/utils/integration.py | 147 +++++----------- setup.cfg | 11 ++ 8 files changed, 167 insertions(+), 238 deletions(-) delete mode 100644 .bumpversion.cfg delete mode 100644 .gitattributes diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index f71c65d0..00000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,10 +0,0 @@ -[bumpversion] -current_version = 2.7.2 -allow_dirty = True -tag_name = version-{new_version} -tag = True -commit = True - -[bumpversion:file:pymove/__init__.py] - -[bumpversion:file:setup.py] diff --git a/.code-style.md b/.code-style.md index ee71c4f0..9099dd13 100644 --- a/.code-style.md +++ b/.code-style.md @@ -22,6 +22,11 @@ We following the [Numpy](https://numpydoc.readthedocs.io/en/latest/format.html) ## flake8 Flake8 is a python linter that helps to keep the code up to PEP standards. + +## mypy + +Mypy is a static type checker for python + To lint the code, run: `make lint` ## Pre-Commit @@ -85,13 +90,14 @@ pymove/core/dataframe.py:970:29: E711 comparison to None should be 'if cond is N - Don't commit to branch: Doesn't allow direct commits to `master` branch. -- seed isort known_third_party: Populates the `.isort.cfg` file. - isort: Sorts the imports. - flake8: Ensures that the code follows `pylint` and `pyflakes` guidelines. It will point the errors in the code. +- mypy: Performs type checking. + It will point the errors in the code. --- ## Codacy diff --git a/.deployment-instructions.md b/.deployment-instructions.md index 74a88808..cb8ea44a 100644 --- a/.deployment-instructions.md +++ b/.deployment-instructions.md @@ -10,78 +10,67 @@ The link in this tutorial will explain the steps to upload a package to pypi: -#### Get started with Travis CI +#### Use Github Actions to deploy -1. Sing up on Travis-ci with GitHub. - -2. Accept the authorization of Travis CI. - -3. Click on your profile picture in the top right of your Travis Dashboard, - click the green Activate button, and select the repositories - you want to use with Travis CI. - -4. Add a .travis.yml file to your repository to tell Travis CI what to do. - -#### Use the .travis.yml file to configure your deploy - -5. Create an API token to authenticate with PyPI: - 1. In your Pypi account settings, go to API tokens section and +1. Create an API token to authenticate with PyPI: + - In your Pypi account settings, go to API tokens section and select "Add API token" - 2. Add the token to the Github Actions Secret. - -6. Create a github action with the following content: - ```yaml - name: Publish to PyPI - on: - push: - tags: - - "*" - - jobs: - build-n-publish: - if: github.event.base_ref == 'refs/heads/' && startsWith(github.ref, 'refs/tags') - name: Build and publish package - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v2 - with: - python-version: 3.6 - - name: Install dependencies - run: | - python -m pip install --upgrade "pip<20" - pip install -r requirements-dev.txt - - name: - run: | - flake8 - pytest - - name: Build - run: | - pip install setuptools wheel twine - python setup.py sdist bdist_wheel - - name: Publish - uses: pypa/gh-action-pypi-publish@master - with: - user: __token__ - password: ${{ secrets.pypi_password }} - ``` + - Add the token to the Github Actions Secret. + +2. Create a github workflow with the following content: +```yaml +name: Publish to PyPI +on: + push: + tags: + - "*" + +jobs: + build-n-publish: + if: github.event.base_ref == 'refs/heads/' && startsWith(github.ref, 'refs/tags') + name: Build and publish package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + make dev + - name: + run: | + make lint + make test + - name: Build + run: | + pip install setuptools wheel twine + python setup.py sdist bdist_wheel + - name: Publish + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }} +``` #### Configure bump2version For the versioning control we a using the package bump2version. 1. Run `pip install bump2version` in your environment -2. Add the following attributes to the .bumpversion.cfg file: -```yaml - [bumpversion] - current_version = - allow_dirty = True - tag_name = version-{new_version} - tag = True - commit = True - [bumpversion:file:] +2. Add the following attributes to the setup.cfg file: +```conf +[bumpversion] +current_version = +allow_dirty = True +tag_name = version-{new_version} +tag = True +commit = True +[bumpversion:file:] +[bumpversion:file:] ``` ***Note:*** If `NotADirectoryError: [Errno 20] Not a directory`, @@ -89,22 +78,18 @@ For the versioning control we a using the package bump2version. #### For more information see these links -- - - --- -### Deploy the package using Travis CI - -1. Run the command `bumperversion [major|minor|patch]` to increase the - version number. This will create a new tag and commit the changes. +1. Run the command `bumperversion [major|minor|patch]` to increase the version number. + This will create a new tag and commit the changes. 2. Push the changes to the developer branch. 3. Create a pull request onto master. To deploy pymove to Pypi using - you must be in the master branch, Travis was configured to only allow - deployments from tagged commits on the master branch. + you must be in the master branch, pushing a tagged commit. 4. After merging the new version into the master branch, push the new tag created by bump2version. @@ -133,65 +118,65 @@ With the package published to Pypi, we can easily deploy to the 2. Now add some information to the `/meta.yaml` file. ```yaml - {% set name = %} - {% set version = %} - - package: - name: "{{ name|lower }}" - version: "{{ version }}" - - source: - url: "https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz" - sha256: - - build: - number: 0 - script: "{{ PYTHON }} -m pip install . -vv" - noarch: python - - requirements: - host: - - pip - - python >=3.6 - run: - - - - python >=3.6 - - test: - imports: - - - - about: - home: - license: - license_family: - license_file: - summary: - doc_url: - dev_url: - - extra: - recipe-maintainers: - - - - +{% set name = %} +{% set version = %} + +package: + name: "{{ name|lower }}" + version: "{{ version }}" + +source: + url: "https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz" + sha256: + +build: + number: 0 + script: "{{ PYTHON }} -m pip install . -vv" + noarch: python + +requirements: + host: + - pip + - python >=3.6 + run: + - + - python >=3.6 + +test: + imports: + - + +about: + home: + license: + license_family: + license_file: + summary: + doc_url: + dev_url: + +extra: + recipe-maintainers: + - + - ``` -1. All package run requirements must be avaiable in the conda-forge channel. +All package run requirements must be available in the conda-forge channel. #### Request the publication to the conda-forge channel -2. Fork the example recipes repository at +1. Fork the example recipes repository at -3. Copy the `/meta.yaml` file created in the step above to +2. Copy the `/meta.yaml` file created in the step above to the forked repo `staged-recipes/recipes/example` directory -4. Push the changes to your forked repository. +3. Push the changes to your forked repository. -5. Make a pull request for your repository to the master branch on +4. Make a pull request for your repository to the master branch on the stage-recipes repository. - `conda-forge:master from :` -6. Now, the pull request will be checked. +5. Now, the pull request will be checked. - Comlete the checklist for the pull requests. - The recipe meta.yaml file will be checked by the `conda-forge-linting service`. @@ -199,10 +184,10 @@ With the package published to Pypi, we can easily deploy to the - The recipe will be built for `linux64`, `macos64` and `windows64` systems. -7. If there are any problems with the PR, a review team member will give +6. If there are any problems with the PR, a review team member will give you feedback, pointing out improvements and answering questions. -8. Once everything is in order, the pull request will be aproved. +7. Once everything is in order, the pull request will be aproved. --- diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index a03776df..00000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -pymove/_version.py export-subst diff --git a/Makefile b/Makefile index d0059a0f..7df795c3 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ dev: clean: rm -rf `find . -type d -name .pytest_cache` + rm -rf `find . -type d -name .mypy_cache` rm -rf `find . -type d -name __pycache__` rm -rf `find . -type d -name .ipynb_checkpoints` rm -rf docs/_build diff --git a/pymove/tests/test_utils_integration.py b/pymove/tests/test_utils_integration.py index e8fb6e1c..d46d4364 100644 --- a/pymove/tests/test_utils_integration.py +++ b/pymove/tests/test_utils_integration.py @@ -371,7 +371,7 @@ def test__reset_set_window__and_creates_event_id_type(): window_starts, window_ends, current_distances, event_id, event_type = ( integration._reset_set_window__and_creates_event_id_type( - move_df, pois, DATETIME, 45000 + move_df, pois, 45000, DATETIME ) ) @@ -440,7 +440,7 @@ def test_reset_set_window_and_creates_event_id_type_all(): window_starts, window_ends, current_distances, event_id, event_type = ( integration._reset_set_window_and_creates_event_id_type_all( - move_df, pois, DATETIME, 7200 + move_df, pois, 7200, DATETIME ) ) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index 2b519872..9127510d 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -520,25 +520,17 @@ def _reset_and_creates_id_and_lat_lon( >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 >>> pois.head() lat lon id type_poi name_poi 0 39.984094 116.319236 1 policia distrito_pol_1 - 1 39.991013 116.326384 2 policia policia_federal - 2 40.010000 116.312615 3 comercio supermercado_aroldo - 3 40.013821 116.306531 4 show forro_ tropykalia - 4 40.008099 116.317711 5 risca-faca rinha_de_galo_world_cup >>> _reset_and_creates_id_and_lat_lon(move_df, pois) - (array([inf, inf, inf, inf, inf, inf, inf, inf, inf]), - array(['', '', '', '', '', '', '', '', ''], dtype=object), - array(['', '', '', '', '', '', '', '', ''], dtype=object), - array([inf, inf, inf, inf, inf, inf, inf]), - array([inf, inf, inf, inf, inf, inf, inf])) - >>> print(type(_reset_and_creates_id_and_lat_lon(move_df, pois))) - + ( + array([inf]), + array([''], dtype=object), + array([''], dtype=object), + array([inf]), + array([inf]) + ) """ if reset_index: logger.debug('... Resetting index to operation...') @@ -565,7 +557,7 @@ def _reset_and_creates_id_and_lat_lon( def _reset_set_window__and_creates_event_id_type( - data: DataFrame, df_events: DataFrame, label_date: Text, time_window: int + data: DataFrame, df_events: DataFrame, time_window: float, label_date: Text = DATETIME ) -> Tuple[Series, Series, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -580,10 +572,10 @@ def _reset_set_window__and_creates_event_id_type( The input trajectory data. df_events : DataFrame The input event point of interest data. - label_date : str - Label of data referring to the datetime. - time_window : int + time_window : float Number of seconds of the time window. + label_date : str, optional + Label of data referring to the datetime, by default DATETIME Returns ------- @@ -596,46 +588,19 @@ def _reset_set_window__and_creates_event_id_type( >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 >>> pois_df - lat lon event_id datetime\ - event_type - 0 39.984094 116.319236 1 2008-10-24 01:57:57\ - show do tropykalia - 1 39.991013 116.326384 2 2008-10-24 00:22:01\ - evento da prefeitura - 2 40.010000 116.312615 3 2008-10-25 00:21:01\ - show do seu joao - 3 40.013821 116.306531 4 2008-10-26 00:22:01\ - missa - >>> _reset_set_window__and_creates_event_id_type(move_df, pois, - 'datetime', 600) - (0 2008-10-23 05:43:05 - 1 2008-10-23 10:27:26 - 2 2008-10-23 10:40:16 - 3 2008-10-23 10:53:06 - 4 2008-10-23 11:48:33 - 5 2008-10-23 23:40:45 - 6 2008-10-23 23:52:14 - 7 2008-10-24 00:12:01 - 8 2008-10-24 01:47:57 - Name: datetime, dtype: datetime64[ns], - 0 2008-10-23 06:03:05 - 1 2008-10-23 10:47:26 - 2 2008-10-23 11:00:16 - 3 2008-10-23 11:13:06 - 4 2008-10-23 12:08:33 - 5 2008-10-24 00:00:45 - 6 2008-10-24 00:12:14 - 7 2008-10-24 00:32:01 - 8 2008-10-24 02:07:57 - Name: datetime, dtype: datetime64[ns], - array([inf, inf, inf, inf, inf, inf, inf, inf, inf]), - array(['', '', '', '', '', '', '', '', ''], dtype=object), - array(['', '', '', '', '', '', '', '', ''], dtype=object)) + lat lon event_id datetime event_type + 0 39.984094 116.319236 1 2008-10-24 01:57:57 show do tropykalia + >>> _reset_set_window__and_creates_event_id_type(move_df, pois, 600) + ( + 0 2008-10-23 05:43:05 + Name: datetime, dtype: datetime64[ns], + 0 2008-10-23 06:03:05 + Name: datetime, dtype: datetime64[ns], + array([inf]), + array([''], dtype=object), + array([''], dtype=object) + ) """ # get a vector with windows time to each point data.reset_index(drop=True, inplace=True) @@ -656,7 +621,7 @@ def _reset_set_window__and_creates_event_id_type( def _reset_set_window_and_creates_event_id_type_all( - data: DataFrame, df_events: DataFrame, label_date: Text, time_window: float + data: DataFrame, df_events: DataFrame, time_window: float, label_date: Text = DATETIME ) -> Tuple[Series, Series, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -671,10 +636,10 @@ def _reset_set_window_and_creates_event_id_type_all( The input trajectory data. df_events : DataFrame The input event point of interest data. - label_date : str - Label of data referring to the datetime. time_window : float Number of seconds of the time window. + label_date : str + Label of data referring to the datetime. Returns ------- @@ -683,51 +648,23 @@ def _reset_set_window_and_creates_event_id_type_all( Examples -------- - >>> from pymove.utils.integration import - _reset_set_window_and_creates_event_id_type_all + >>> from pymove.utils.integration import _reset_set_window_and_creates_event_id_type_all # noqa >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 >>> pois_df - lat lon event_id datetime\ - event_type - 0 39.984094 116.319236 1 2008-10-24 01:57:57\ - show do tropykalia - 1 39.991013 116.326384 2 2008-10-24 00:22:01\ - evento da prefeitura - 2 40.010000 116.312615 3 2008-10-25 00:21:01\ - show do seu joao - 3 40.013821 116.306531 4 2008-10-26 00:22:01\ - missa - >>> _reset_set_window_and_creates_event_id_type_all(move_df, pois, - 'datetime', 600) - (0 2008-10-23 05:43:05 - 1 2008-10-23 10:27:26 - 2 2008-10-23 10:40:16 - 3 2008-10-23 10:53:06 - 4 2008-10-23 11:48:33 - 5 2008-10-23 23:40:45 - 6 2008-10-23 23:52:14 - 7 2008-10-24 00:12:01 - 8 2008-10-24 01:47:57 - Name: datetime, dtype: datetime64[ns], - 0 2008-10-23 06:03:05 - 1 2008-10-23 10:47:26 - 2 2008-10-23 11:00:16 - 3 2008-10-23 11:13:06 - 4 2008-10-23 12:08:33 - 5 2008-10-24 00:00:45 - 6 2008-10-24 00:12:14 - 7 2008-10-24 00:32:01 - 8 2008-10-24 02:07:57 - Name: datetime, dtype: datetime64[ns], - array([None, None, None, None, None, None, None, None, None], dtype=object), - array([None, None, None, None, None, None, None, None, None], dtype=object), - array([None, None, None, None, None, None, None, None, None], dtype=object)) + lat lon event_id datetime event_type + 0 39.984094 116.319236 1 2008-10-24 01:57:57 show do tropykalia + >>> _reset_set_window_and_creates_event_id_type_all(move_df, pois, 600) + ( + 0 2008-10-23 05:43:05 + Name: datetime, dtype: datetime64[ns], + 0 2008-10-23 06:03:05 + Name: datetime, dtype: datetime64[ns], + array([None], dtype=object), + array([None], dtype=object), + array([None], dtype=object) + ) """ # get a vector with windows time to each point data.reset_index(drop=True, inplace=True) @@ -1167,7 +1104,7 @@ def join_with_poi_datetime( """ values = _reset_set_window__and_creates_event_id_type( - data, df_events, label_date, time_window + data, df_events, time_window, label_date ) window_starts, window_ends, current_distances, event_id, event_type = values @@ -1291,7 +1228,7 @@ def join_with_poi_datetime_optimizer( """ values = _reset_set_window__and_creates_event_id_type( - data, df_events, label_date, time_window + data, df_events, time_window, label_date ) window_starts, window_ends, current_distances, event_id, event_type = values @@ -1441,7 +1378,7 @@ def join_with_pois_by_dist_and_datetime( raise KeyError("POI's DataFrame must contain a %s column" % label_date) values = _reset_set_window_and_creates_event_id_type_all( - data, df_pois, label_date, time_window + data, df_pois, time_window, label_date ) window_start, window_end, current_distances, event_id, event_type = values diff --git a/setup.cfg b/setup.cfg index 0f9f962c..7a25badc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,14 @@ +[bumpversion] +current_version = 2.7.2 +allow_dirty = True +tag_name = version-{new_version} +tag = True +commit = True + +[bumpversion:file:pymove/__init__.py] + +[bumpversion:file:setup.py] + [flake8] ignore = E203, E266, W402, W503, F401, F841, D401 max-line-length = 90 From 9fb22cd68f02f6e3a8610332b0a237304d3e1f7f Mon Sep 17 00:00:00 2001 From: flych3r Date: Wed, 16 Jun 2021 14:55:46 -0300 Subject: [PATCH 31/56] list spaces --- .deployment-instructions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.deployment-instructions.md b/.deployment-instructions.md index cb8ea44a..7bb75745 100644 --- a/.deployment-instructions.md +++ b/.deployment-instructions.md @@ -13,10 +13,10 @@ The link in this tutorial will explain the steps to upload a package to pypi: Date: Fri, 18 Jun 2021 13:44:13 -0300 Subject: [PATCH 32/56] changed type to pandasmove --- pymove/core/pandas.py | 4 ++-- pymove/visualization/folium.py | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pymove/core/pandas.py b/pymove/core/pandas.py index 1e48a129..0f0bf91c 100644 --- a/pymove/core/pandas.py +++ b/pymove/core/pandas.py @@ -1144,7 +1144,7 @@ def generate_speed_features( def generate_move_and_stop_by_radius( self, - radius: int = 0, + radius: float = 0, target_label: Text = DIST_TO_PREV, inplace: bool = True ): @@ -1153,7 +1153,7 @@ def generate_move_and_stop_by_radius( Parameters ---------- - radius : int, optional + radius : float, optional Represents radius, by default 0 target_label : str, optional Represents column to compute, by default DIST_TO_PREV diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 99b3a01e..2eb84256 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -35,6 +35,7 @@ from folium.plugins import FastMarkerCluster, HeatMap, HeatMapWithTime, MarkerCluster from pandas import DataFrame +from pymove import PandasMoveDataFrame from pymove.preprocessing import filters from pymove.utils import distances from pymove.utils.constants import ( @@ -925,7 +926,7 @@ def plot_trajectory_by_id_folium( def plot_trajectory_by_period( - move_data: DataFrame, + move_data: PandasMoveDataFrame, period: Text, id_: Optional[int] = None, n_rows: Optional[int] = None, @@ -1016,7 +1017,7 @@ def plot_trajectory_by_period( def plot_trajectory_by_day_week( - move_data: DataFrame, + move_data: PandasMoveDataFrame, day_week: Text, id_: Optional[int] = None, n_rows: Optional[int] = None, @@ -1107,7 +1108,7 @@ def plot_trajectory_by_day_week( def plot_trajectory_by_date( - move_data: DataFrame, + move_data: PandasMoveDataFrame, start_date: Union[Text, date], end_date: Union[Text, date], id_: Optional[int] = None, @@ -1207,7 +1208,7 @@ def plot_trajectory_by_date( def plot_trajectory_by_hour( - move_data: DataFrame, + move_data: PandasMoveDataFrame, start_hour: Text, end_hour: Text, id_: Optional[int] = None, @@ -1301,7 +1302,7 @@ def plot_trajectory_by_hour( def plot_stops( - move_data: DataFrame, + move_data: PandasMoveDataFrame, radius: float = 0, weight: float = 3, id_: Optional[int] = None, From 945be1c6599d69967c04c3602fac795cc5dc88ec Mon Sep 17 00:00:00 2001 From: flych3r Date: Wed, 23 Jun 2021 19:45:15 -0300 Subject: [PATCH 33/56] m --- pymove/visualization/folium.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 99b3a01e..3bcd6e18 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -1504,7 +1504,6 @@ def _circle_maker( user_lon: str. Longitude column name. slice_tags: list or iterable - user_point: str. Point color. radius: float. @@ -1732,20 +1731,22 @@ def show_trajs_with_event( line_color: Text = LINE_COLOR, slice_event_show: Optional[List] = None, slice_subject_show: Optional[List] = None, -) -> List[Map]: +) -> List[Tuple[Map, DataFrame]]: """ Plot a trajectory, including your user_points lat lon and your tags. + For each event, shows users that are in range of time and space. + Parameters ---------- move_data: DataFrame. Trajectory input data. window_time_subject: float. - The subject time window. + The subject time window in seconds. window_time_event: float. - The event time window. + The event time window in seconds. radius: float. - The radius to use. + The radius to use in meters. event_lat: str, optional Event latitude column name, by default LATITUDE. event_lon: str, optional @@ -1845,10 +1846,10 @@ def show_trajs_with_event( user_datetime ) - move_data = df_event[df_event[event_id] == e_id] + event_data = df_event[df_event[event_id] == e_id] base_map = plot_event_folium( - move_data, + event_data, event_lat=event_lat, event_lon=event_lon, event_point=event_point, From 8ecb27b7b990355bc5c080f74f4ec615bacb079e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Carvalho?= Date: Sat, 26 Jun 2021 00:32:37 -0300 Subject: [PATCH 34/56] correction: putting examples on integration module In this commit, I update the examples starting in the join functions to the end --- pymove/utils/integration.py | 449 ++++++++++++++---------------------- 1 file changed, 170 insertions(+), 279 deletions(-) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index 9127510d..b494cdc8 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -715,42 +715,28 @@ def join_with_pois( Examples -------- >>> from pymove.utils.integration import join_with_pois - >>> POIs.head() - unique_id osmid element_type amenity fee\ - geometry - 0 node/269492188 269492188 node toilets no\ - POINT (116.26750 39.98087) - 1 node/274942287 274942287 node toilets NaN\ - POINT (116.27358 39.99664) - 2 node/276320137 276320137 node fast_food NaN\ - POINT (116.33756 39.97541) - 3 node/276320142 276320142 node massage NaN\ - POINT (116.33751 39.97546) - 4 node/286242547 286242547 node toilets NaN\ - POINT (116.19982 40.00670) - .... - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - ... - >>> join_with_pois(move_df, POIs, label_id='osmid', label_poi_name='name') - >>> move_df.head() - lat lon datetime id\ - id_poi dist_poi name_poi - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - 5572452688 116.862844 太平洋影城(中关村店) - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - 5572452688 119.142692 太平洋影城(中关村店) - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - 5572452688 116.595117 太平洋影城(中关村店) - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - 5572452688 116.257378 太平洋影城(中关村店) - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - 5572452688 114.886759 太平洋影城(中关村店) + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 + >>> pois + lat lon id type_poi name_poi + 0 39.984094 116.319236 1 policia distrito_pol_1 + 1 39.991013 116.326384 2 policia policia_federal + 2 40.010000 116.312615 3 comercio supermercado_aroldo + >>> join_with_pois(move_df, pois) + >>> move_df + lat lon datetime id id_poi dist_poi name_poi + 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 0.000000 distrito_pol_1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 637.690216 distrito_pol_1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 3 1094.860663 supermercado_aroldo + 3 40.016238 116.307691 2008-10-23 11:03:06 1 3 810.542998 supermercado_aroldo + 4 40.013814 116.306525 2008-10-23 11:58:33 2 3 669.973155 supermercado_aroldo + 5 40.009735 116.315069 2008-10-23 23:50:45 2 3 211.069129 supermercado_aroldo """ values = _reset_and_creates_id_and_lat_lon(data, df_pois, True, reset_index) current_distances, ids_pois, tag_pois, lat_user, lon_user = values @@ -820,36 +806,6 @@ def join_with_pois_optimizer( reset_index : bool, optional Flag for reset index of the df_pois and data dataframes before the join, by default True - - Examples - -------- - >>> from pymove.utils.integration import join_with_pois_optimizer - >>> from pymove.utils.integration import join_with_pois - >>> POIs.head() - unique_id osmid element_type amenity fee\ - geometry - 0 node/269492188 269492188 node toilets no\ - POINT (116.26750 39.98087) - 1 node/274942287 274942287 node toilets NaN\ - POINT (116.27358 39.99664) - 2 node/276320137 276320137 node fast_food NaN\ - POINT (116.33756 39.97541) - 3 node/276320142 276320142 node massage NaN\ - POINT (116.33751 39.97546) - 4 node/286242547 286242547 node toilets NaN\ - POINT (116.19982 40.00670) - .... - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - ... - >>> join_with_pois_optimizer(move_df, POIs, label_id='osmid', - label_poi_name='name', dist_poi=np.array([100,9,1,50,50,10,20])) - 'the size of the dist_poi is different from the size of pois' """ if dist_poi is None: dist_poi = [] @@ -937,43 +893,27 @@ def join_with_pois_by_category( Examples -------- >>> from pymove.utils.integration import join_with_pois_by_category - >>> POIs.head() - unique_id osmid element_type amenity fee/ - geometry - 0 node/269492188 269492188 node toilets no/ - POINT (116.26750 39.98087) - 1 node/274942287 274942287 node toilets NaN/ - POINT (116.27358 39.99664) - 2 node/276320137 276320137 node fast_food NaN/ - POINT (116.33756 39.97541) - 3 node/276320142 276320142 node massage NaN/ - POINT (116.33751 39.97546) - 4 node/286242547 286242547 node toilets NaN/ - POINT (116.19982 40.00670) - .... - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - ... - >>> join_with_pois_by_category(move_df, POIs, - label_category='amenity', label_id='osmid') - >>> move_df.head() - lat lon datetime id/ - id_toilets dist_toilets id_fast_food ... - 0 39.984094 116.319236 2008-10-23 05:53:05 1/ - 274942287 4132.229067 276320137 ... - 1 39.984198 116.319322 2008-10-23 05:53:06 1/ - 274942287 4135.240296 276320137 ... - 2 39.984224 116.319402 2008-10-23 05:53:11 1/ - 274942287 4140.698090 276320137 ... - 3 39.984211 116.319389 2008-10-23 05:53:16 1/ - 274942287 4140.136625 276320137 ... - 4 39.984217 116.319422 2008-10-23 05:53:21 1/ - 274942287 4142.564150 276320137 ... + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 + >>> pois + lat lon id type_poi name_poi + 0 39.984094 116.319236 1 policia distrito_pol_1 + 1 39.991013 116.326384 2 policia policia_federal + 2 40.010000 116.312615 3 comercio supermercado_aroldo + >>> join_with_pois_by_category(move_df, pois) + lat lon datetime id id_policia dist_policia id_comercio dist_comercio + 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 0.000000 3 2935.310277 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 637.690216 3 3072.696379 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 2 1385.087181 3 1094.860663 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 2 3225.288831 3 810.542998 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 2 3047.838222 3 669.973155 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 2 2294.075820 3 211.069129 """ logger.debug('Integration with POIs...') @@ -1061,41 +1001,28 @@ def join_with_poi_datetime( Examples -------- >>> from pymove.utils.integration import join_with_poi_datetime - >>> POIs_events - unique_id osmid element_type amenity\ - fee - 0 node/269492188 269492188 node toilets\ - no... - 1 node/931686797 931686797 node post_office\ - NaN... - 2 node/992592626 992592626 node parking\ - NaN... - 3 node/1423043074 1423043074 node car_wash\ - NaN... - 4 node/1803755348 1803755348 node telephone\ - NaN... - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> join_with_poi_datetime(df_7, POIs_events, label_date='datetime', - time_window=900, label_event_id='osmid', label_event_type='amenity') - >>> move_df.head() - lat lon datetime id\ - osmid dist_event amenity - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - 269492188 4422.237186 toilets - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - 269492188 4430.488277 toilets - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - 269492188 4437.521909 toilets - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - 269492188 4436.297310 toilets - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - 269492188 4439.154806 toilets + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 + >>> pois + lat lon event_id datetime event_type + 0 39.984094 116.319236 1 2008-10-24 01:57:57 show do tropykalia + 1 39.991013 116.326384 2 2008-10-24 00:22:01 evento da prefeitura + 2 40.010000 116.312615 3 2008-10-25 00:21:01 show do seu joao + >>> join_with_poi_datetime(move_df, pois) + >>> move_df + lat lon datetime id event_id dist_event event_type + 0 39.984094 116.319236 2008-10-23 05:53:05 1 inf + 1 39.984559 116.326696 2008-10-23 10:37:26 1 inf + 2 40.002899 116.321520 2008-10-23 10:50:16 1 inf + 3 40.016238 116.307691 2008-10-23 11:03:06 1 inf + 4 40.013814 116.306525 2008-10-23 11:58:33 2 inf + 5 40.009735 116.315069 2008-10-23 23:50:45 2 inf Raises ------ @@ -1182,45 +1109,7 @@ def join_with_poi_datetime_optimizer( Label of df_events referring to the id of the event, by default EVENT_ID label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE - - Examples - -------- - >>> from pymove.utils.integration import join_with_poi_datetime_optimizer - >>> POIs_events - unique_id event_id element_type event_type\ - fee - 0 node/269492188 269492188 node toilets\ - no... - 1 node/931686797 931686797 node post_office\ - NaN... - 2 node/992592626 992592626 node parking\ - NaN... - 3 node/1423043074 1423043074 node car_wash\ - NaN... - 4 node/1803755348 1803755348 node telephone\ - NaN... - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> join_with_poi_datetime_optimizer(df_8, POIs_events) - >>> move_df.head() - lat lon datetime id\ - event_id dist_event event_type - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - 269492188 4422.237186 toilets - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - 269492188 4430.488277 toilets - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - 269492188 4437.521909 toilets - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - 269492188 4436.297310 toilets - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - 269492188 4439.154806 toilets - + Raises ------ ValueError @@ -1333,41 +1222,29 @@ def join_with_pois_by_dist_and_datetime( Examples -------- >>> from pymove.utils.integration import join_with_pois_by_dist_and_datetime - >>> POIs_events - unique_id event_id element_type event_type\ - fee - 0 node/269492188 269492188 node toilets\ - no... - 1 node/931686797 931686797 node post_office\ - NaN... - 2 node/992592626 992592626 node parking\ - NaN... - 3 node/1423043074 1423043074 node car_wash\ - NaN... - 4 node/1803755348 1803755348 node telephone\ - NaN... - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> join_with_poi_datetime_optimizer(df_8, POIs_events) - >>> move_df.head() - lat lon datetime id\ - event_id dist_event event_type - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - None None None - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - None None None - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - None None None - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - None None None - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - None None None - + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 + >>> pois + lat lon event_id datetime event_type + 0 39.984094 116.319236 1 2008-10-24 01:57:57 show do tropykalia + 1 39.991013 116.326384 2 2008-10-24 00:22:01 evento da prefeitura + 2 40.010000 116.312615 3 2008-10-25 00:21:01 show do seu joao + >>> join_with_pois_by_dist_and_datetime(move_df, pois) + >>> move_df + lat lon datetime id event_id dist_event event_type + 0 39.984094 116.319236 2008-10-23 05:53:05 1 None None None + 1 39.984559 116.326696 2008-10-23 10:37:26 1 None None None + 2 40.002899 116.321520 2008-10-23 10:50:16 1 None None None + 3 40.016238 116.307691 2008-10-23 11:03:06 1 None None None + 4 40.013814 116.306525 2008-10-23 11:58:33 2 None None None + 5 40.009735 116.315069 2008-10-23 23:50:45 2 None None None + Raises ------ ValueError @@ -1472,31 +1349,27 @@ def join_with_home_by_id( Examples -------- >>> from pymove.utils.integration import join_with_home_by_id - >>> move_df.head() - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> home_df.head() - lat lon datetime id formatted_address city - 300 39.991574 116.326394 2008-10-23 10:42:03 1 Rua1, n02 ChinaTown - 301 39.991652 116.326414 2008-10-23 10:42:08 1 Rua2, n03 ChinaTown - >>> join_with_home_by_id(move_df, home_df, label_id='id') - >>> move_df.head() - lat lon datetime id\ - dist_home home city - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - 1031.348370 Rua1, n02 ChinaTown - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - 1017.690147 Rua1, n02 ChinaTown - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - 1011.332141 Rua1, n02 ChinaTown - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - 1013.152700 Rua1, n02 ChinaTown - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - 1010.959220 Rua1, n02 ChinaTown + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 + >>> home_df + lat lon id formatted_address city + 0 39.984094 116.319236 1 rua da mae quixiling + 1 40.013821 116.306531 2 rua da familia quixeramoling + >>> join_with_home_by_id(move_df, home_df) + >>> move_df + id lat lon datetime dist_home home city + 0 1 39.984094 116.319236 2008-10-23 05:53:05 0.000000 rua da mae quixiling + 1 1 39.984559 116.326696 2008-10-23 10:37:26 637.690216 rua da mae quixiling + 2 1 40.002899 116.321520 2008-10-23 10:50:16 2100.053501 rua da mae quixiling + 3 1 40.016238 116.307691 2008-10-23 11:03:06 3707.066732 rua da mae quixiling + 4 2 40.013814 116.306525 2008-10-23 11:58:33 0.931101 rua da familia quixeramoling + 5 2 40.009735 116.315069 2008-10-23 23:50:45 857.417540 rua da familia quixeramoling """ ids_without_home = [] @@ -1585,48 +1458,66 @@ def merge_home_with_poi( Examples -------- - >>> from pymove.utils.integration import merge_home_with_poi - >>> move_df.head() - lat lon datetime id\ - dist_home home city - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - 1031.348370 Rua1, n02 ChinaTown - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - 1017.690147 Rua1, n02 ChinaTown - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - 1011.332141 Rua1, n02 ChinaTown - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - 1013.152700 Rua1, n02 ChinaTown - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - 1010.959220 Rua1, n02 ChinaTown - >>> POIs.head() - unique_id osmid element_type amenity fee\ - geometry - 0 node/269492188 269492188 node toilets no\ - POINT (116.26750 39.98087) - 1 node/274942287 274942287 node toilets NaN\ - POINT (116.27358 39.99664) - 2 node/276320137 276320137 node fast_food NaN\ - POINT (116.33756 39.97541) - 3 node/276320142 276320142 node massage NaN\ - POINT (116.33751 39.97546) - 4 node/286242547 286242547 node toilets NaN\ - POINT (116.19982 40.00670) - .... - >>> join_with_pois(move_df, POIs, label_id='osmid', label_poi_name='name') - >>> move_df.head() - id lat lon datetime\ - city id_poi dist_poi name_poi - 0 1 39.984094 116.319236 2008-10-23 05:53:05\ - ChinaTown 557245268 116.862844 太平洋影城(中关村店) - 1 1 39.984198 116.319322 2008-10-23 05:53:06\ - ChinaTown 5572452688 119.142692 太平洋影城(中关村店) - 2 1 39.984224 116.319402 2008-10-23 05:53:11\ - ChinaTown 5572452688 116.595117 太平洋影城(中关村店) - 3 1 39.984211 116.319389 2008-10-23 05:53:16\ - ChinaTown 5572452688 116.257378 太平洋影城(中关村店) - 4 1 39.984217 116.319422 2008-10-23 05:53:21\ - ChinaTown 5572452688 114.886759 太平洋影城(中关村店) + >>> from pymove.utils.integration import merge_home_with_poi, join_with_pois, + join_with_home_by_id + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 + >>> pois + lat lon id type_poi name_poi + 0 39.984094 116.319236 1 policia distrito_pol_1 + 1 39.991013 116.326384 2 policia policia_federal + >>> join_with_pois(move_df, pois) + >>> move_df + lat lon datetime id id_poi dist_poi name_poi + 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 0.000000 distrito_pol_1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 637.690216 distrito_pol_1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 2 1385.087181 policia_federal + 3 40.016238 116.307691 2008-10-23 11:03:06 1 2 3225.288831 policia_federal + 4 40.013814 116.306525 2008-10-23 11:58:33 2 2 3047.838222 policia_federal + 5 40.009735 116.315069 2008-10-23 23:50:45 2 2 2294.075820 policia_federal + >>> home_df + lat lon id formatted_address city + 0 39.984094 116.319236 1 rua da mae quixiling + 1 40.013821 116.306531 2 rua da familia quixeramoling + >>> join_with_home_by_id(move, home_df) + >>> move_df + id lat lon datetime id_poi dist_poi\ + name_poi dist_home home city + 0 1 39.984094 116.319236 2008-10-23 05:53:05 1 0.000000\ + distrito_pol_1 0.000000 rua da mae quixiling + 1 1 39.984559 116.326696 2008-10-23 10:37:26 1 637.690216\ + distrito_pol_1 637.690216 rua da mae quixiling + 2 1 40.002899 116.321520 2008-10-23 10:50:16 2 1385.087181\ + policia_federal 2100.053501 rua da mae quixiling + 3 1 40.016238 16.307691 2008-10-23 11:03:06 2 3225.288831\ + policia_federal 3707.066732 rua da mae quixiling + 4 2 40.013814 116.306525 2008-10-23 11:58:33 2 3047.838222\ + policia_federal 0.931101 rua da familia quixeramoling + 5 2 40.009735 116.315069 2008-10-23 23:50:45 2 2294.075820\ + policia_federal 857.417540 rua da familia quixeramoling + >>> merge_home_with_poi(move_df) # MAIN FUNCTION + id lat lon datetime id_poi\ + dist_poi name_poi city + 0 1 39.984094 116.319236 2008-10-23 05:53:05 rua da mae\ + 0.000000 home quixiling + 1 1 39.984559 116.326696 2008-10-23 10:37:26 rua da mae\ + 637.690216 home quixiling + 2 1 40.002899 116.321520 2008-10-23 10:50:16 2\ + 1385.087181 policia_federal quixiling + 3 1 40.016238 116.307691 2008-10-23 11:03:06 2\ + 3225.288831 policia_federal quixiling + 4 2 40.013814 116.306525 2008-10-23 11:58:33 rua da familia\ + 0.931101 home quixeramoling + 5 2 40.009735 116.315069 2008-10-23 23:50:45 rua da familia\ + 857.417540 home quixeramoling + + """ logger.debug('merge home with POI using shortest distance') idx = data[data[label_dist_home] <= data[label_dist_poi]].index From 038951c410dd85922da7490c24fc4d45301faa2e Mon Sep 17 00:00:00 2001 From: flych3r Date: Sat, 26 Jun 2021 09:43:30 -0300 Subject: [PATCH 35/56] fix optimizer --- pymove/tests/test_utils_integration.py | 94 +++++++-------------- pymove/utils/integration.py | 109 ++++++++++++------------- 2 files changed, 83 insertions(+), 120 deletions(-) diff --git a/pymove/tests/test_utils_integration.py b/pymove/tests/test_utils_integration.py index d46d4364..38e245f0 100644 --- a/pymove/tests/test_utils_integration.py +++ b/pymove/tests/test_utils_integration.py @@ -488,76 +488,42 @@ def test_join_with_pois(): integration.join_with_pois(move_df, pois) assert_frame_equal(move_df, expected, check_dtype=False) - move_df = MoveDataFrame(list_move) - integration.join_with_pois(move_df, pois) - assert_frame_equal(move_df, expected, check_dtype=False) - def test_join_with_pois_optimizer(): move_df = MoveDataFrame(list_move) + pois = DataFrame( data=list_pois, columns=[LATITUDE, LONGITUDE, TRAJ_ID, TYPE_POI, NAME_POI], index=[0, 1, 2, 3, 4, 5, 6] ) - expected = DataFrame( - data=[ - [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1, 1, - 0.0, 'policia'], - [39.984559000000004, 116.326696, Timestamp('2008-10-23 10:37:26'), - 1, 1, 128.24869775642176, 'policia'], - [40.002899, 116.32151999999999, Timestamp('2008-10-23 10:50:16'), - 1, 1, 663.0104596559174, 'policia'], - [40.016238, 116.30769099999999, Timestamp('2008-10-23 11:03:06'), - 1, 1, 286.3387434682031, 'policia'], - [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 1, - 0.9311014399622559, 'policia'], - [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, 1, - 211.06912863495492, 'policia'], - [39.993527, 116.32648300000001, Timestamp('2008-10-24 00:02:14'), - 2, 1, 279.6712398549538, 'policia'], - [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, 1, - 792.7526066105717, 'policia'], - [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, 1, - 270.7018856738821, 'policia'] - ], - columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID, ID_POI, DIST_POI, NAME_POI], - index=[0, 1, 2, 3, 4, 5, 6, 7, 8] - ) - expected = MoveDataFrame(expected) - integration.join_with_pois_optimizer( - move_df, pois, dist_poi=[100, 50, 100, 50, 100, 200, 1000] - ) - assert_frame_equal(move_df, expected, check_dtype=False) - move_df = MoveDataFrame(list_move) - integration.join_with_pois_optimizer( - move_df, pois, dist_poi=[100, 50, 100, 50, 100, 200, 1000], reset_index=False - ) expected = DataFrame( data=[ [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1, 1, - 0.0, 'policia'], + 0.0, 'distrito_pol_1'], [39.984559000000004, 116.326696, Timestamp('2008-10-23 10:37:26'), - 1, 1, 128.24869775642176, 'policia'], + 1, 6, 128.24869775642176, 'adocao_de_animais'], [40.002899, 116.32151999999999, Timestamp('2008-10-23 10:50:16'), - 1, 1, 663.0104596559174, 'policia'], + 1, 5, 663.0104596559174, 'rinha_de_galo_world_cup'], [40.016238, 116.30769099999999, Timestamp('2008-10-23 11:03:06'), - 1, 1, 286.3387434682031, 'policia'], - [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 1, - 0.9311014399622559, 'policia'], - [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, 1, - 211.06912863495492, 'policia'], + 1, 4, 286.3387434682031, 'forro_tropykalia'], + [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 4, + 0.9311014399622559, 'forro_tropykalia'], + [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, 3, + 211.06912863495492, 'supermercado_aroldo'], [39.993527, 116.32648300000001, Timestamp('2008-10-24 00:02:14'), - 2, 1, 279.6712398549538, 'policia'], - [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, 1, - 792.7526066105717, 'policia'], - [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, 1, - 270.7018856738821, 'policia'] + 2, 2, 279.6712398549538, 'policia_federal'], + [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, 6, + 792.7526066105717, 'adocao_de_animais'], + [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, 7, + 270.7018856738821, 'dia_do_municipio'] ], columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID, ID_POI, DIST_POI, NAME_POI], index=[0, 1, 2, 3, 4, 5, 6, 7, 8] ) + + integration.join_with_pois_optimizer(move_df, pois) assert_frame_equal(move_df, expected, check_dtype=False) @@ -677,24 +643,24 @@ def test_join_with_poi_datetime_optimizer(): ) expected = DataFrame( data=[ - [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1, 1, - 0.0, 'show do tropykalia'], + [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1, + '', inf, ''], [39.984559000000004, 116.326696, Timestamp('2008-10-23 10:37:26'), - 1, 1, 637.6902157810676, 'show do tropykalia'], + 1, '', inf, ''], [40.002899, 116.32151999999999, Timestamp('2008-10-23 10:50:16'), - 1, 1, 1094.8606633486436, 'show do tropykalia'], + 1, '', inf, ''], [40.016238, 116.30769099999999, Timestamp('2008-10-23 11:03:06'), - 1, 1, 286.3387434682031, 'show do tropykalia'], - [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 1, - 0.9311014399622559, 'show do tropykalia'], - [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, - '', inf, ''], + 1, '', inf, ''], + [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 2, + 3047.8382223981853, 'evento da prefeitura'], + [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, 2, + 2294.0758201547073, 'evento da prefeitura'], [39.993527, 116.32648300000001, Timestamp('2008-10-24 00:02:14'), - 2, '', inf, ''], - [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, - '', inf, ''], - [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, - '', inf, ''] + 2, 2, 279.6712398549538, 'evento da prefeitura'], + [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, 1, + 900.7798955139455, 'show do tropykalia'], + [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, 1, + 770.188754517813, 'show do tropykalia'] ], columns=[ LATITUDE, LONGITUDE, DATETIME, TRAJ_ID, EVENT_ID, DIST_EVENT, EVENT_TYPE diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index 9127510d..413e1ca6 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -853,61 +853,55 @@ def join_with_pois_optimizer( """ if dist_poi is None: dist_poi = [] - if len(df_pois[label_poi_name].unique()) == len(dist_poi): - values = _reset_and_creates_id_and_lat_lon(data, df_pois, False, reset_index) - minimum_distances, ids_pois, tag_pois, lat_poi, lon_poi = values - df_pois.rename( - columns={label_id: TRAJ_ID, label_poi_name: NAME_POI}, - inplace=True - ) + values = _reset_and_creates_id_and_lat_lon(data, df_pois, False, reset_index) + minimum_distances, ids_pois, tag_pois, lat_poi, lon_poi = values - for idx, row in progress_bar( - df_pois.iterrows(), total=len(df_pois), desc='Optimized integration with POIs' - ): - # update lat and lon of current index - lat_poi.fill(row[LATITUDE]) - lon_poi.fill(row[LONGITUDE]) + df_pois.rename( + columns={label_id: TRAJ_ID, label_poi_name: NAME_POI}, + inplace=True + ) - # First iteration is minimum distances - if idx == 0: - minimum_distances = np.array( - haversine( - lat_poi, - lon_poi, - data[LATITUDE].values, - data[LONGITUDE].values - ) - ) - ids_pois.fill(row.id) - tag_pois.fill(row.type_poi) - else: - # compute dist between a POI and ALL - logger.debug(data[LONGITUDE].values) - current_distances = np.float64( - haversine( - lat_poi, - lon_poi, - data[LATITUDE].values, - data[LONGITUDE].values - ) + for idx, row in progress_bar( + df_pois.iterrows(), total=len(df_pois), desc='Optimized integration with POIs' + ): + # update lat and lon of current index + lat_poi.fill(row[LATITUDE]) + lon_poi.fill(row[LONGITUDE]) + + # First iteration is minimum distances + if idx == 0: + minimum_distances = np.array( + haversine( + lat_poi, + lon_poi, + data[LATITUDE].values, + data[LONGITUDE].values ) - compare = current_distances < minimum_distances - index_true = np.where(compare is True)[0] - minimum_distances = np.minimum( - current_distances, minimum_distances, dtype=np.float64 + ) + ids_pois.fill(row[label_id]) + tag_pois.fill(row[label_poi_name]) + else: + # compute dist between a POI and ALL + current_distances = np.float64( + haversine( + lat_poi, + lon_poi, + data[LATITUDE].values, + data[LONGITUDE].values ) + ) + compare = current_distances < minimum_distances + minimum_distances = np.minimum( + current_distances, minimum_distances, dtype=np.float64 + ) + ids_pois[compare] = row[label_id] + tag_pois[compare] = row[label_poi_name] - if index_true.shape[0] > 0: - ids_pois[index_true] = row.id - tag_pois[index_true] = row.type_poi - - data[ID_POI] = ids_pois - data[DIST_POI] = minimum_distances - data[NAME_POI] = tag_pois - logger.debug('Integration with POI was finalized') - else: - logger.warning('the size of the dist_poi is different from the size of pois') + data[ID_POI] = ids_pois + data[DIST_POI] = minimum_distances + data[NAME_POI] = tag_pois + logger.debug('Integration with POI was finalized') def join_with_pois_by_category( @@ -1230,7 +1224,10 @@ def join_with_poi_datetime_optimizer( values = _reset_set_window__and_creates_event_id_type( data, df_events, time_window, label_date ) - window_starts, window_ends, current_distances, event_id, event_type = values + *_, current_distances, event_id, event_type = values + window_starts, window_ends, *_ = _reset_set_window__and_creates_event_id_type( + df_events, data, time_window, label_date + ) minimum_distances = np.full( data.shape[0], np.Infinity, dtype=np.float64 @@ -1248,7 +1245,6 @@ def join_with_poi_datetime_optimizer( df_filtered = filters.by_datetime( data, window_starts[idx], window_ends[idx] ) - if df_filtered is None: raise ValueError('Filtering datetime failed!') @@ -1271,8 +1267,8 @@ def join_with_poi_datetime_optimizer( df_filtered[LATITUDE].values, df_filtered[LONGITUDE].values, ) - event_id[indexes] = row.event_id - event_type[indexes] = row.event_type + event_id[indexes] = row[label_event_id] + event_type[indexes] = row[label_event_type] else: current_distances[indexes] = haversine( lat_event, @@ -1281,13 +1277,14 @@ def join_with_poi_datetime_optimizer( df_filtered[LONGITUDE].values, ) compare = current_distances < minimum_distances - index_true = np.where(compare is True)[0] minimum_distances = np.minimum( current_distances, minimum_distances ) - event_id[index_true] = row.event_id - event_type[index_true] = row.event_type + # compare = np.argmin(current_distances) + + event_id[compare] = row[label_event_id] + event_type[compare] = row[label_event_type] data[label_event_id] = event_id data[DIST_EVENT] = minimum_distances From c9269f07948e916853aaff5adda6b82d1bbfa129 Mon Sep 17 00:00:00 2001 From: flych3r Date: Sat, 26 Jun 2021 10:56:39 -0300 Subject: [PATCH 36/56] cleaned examples --- pymove/tests/test_utils_integration.py | 112 +----- pymove/utils/distances.py | 2 +- pymove/utils/geoutils.py | 2 +- pymove/utils/integration.py | 494 +++++++++++-------------- 4 files changed, 225 insertions(+), 385 deletions(-) diff --git a/pymove/tests/test_utils_integration.py b/pymove/tests/test_utils_integration.py index 38e245f0..aef45f15 100644 --- a/pymove/tests/test_utils_integration.py +++ b/pymove/tests/test_utils_integration.py @@ -485,45 +485,7 @@ def test_join_with_pois(): index=[0, 1, 2, 3, 4, 5, 6, 7, 8] ) - integration.join_with_pois(move_df, pois) - assert_frame_equal(move_df, expected, check_dtype=False) - - -def test_join_with_pois_optimizer(): - move_df = MoveDataFrame(list_move) - - pois = DataFrame( - data=list_pois, - columns=[LATITUDE, LONGITUDE, TRAJ_ID, TYPE_POI, NAME_POI], - index=[0, 1, 2, 3, 4, 5, 6] - ) - - expected = DataFrame( - data=[ - [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1, 1, - 0.0, 'distrito_pol_1'], - [39.984559000000004, 116.326696, Timestamp('2008-10-23 10:37:26'), - 1, 6, 128.24869775642176, 'adocao_de_animais'], - [40.002899, 116.32151999999999, Timestamp('2008-10-23 10:50:16'), - 1, 5, 663.0104596559174, 'rinha_de_galo_world_cup'], - [40.016238, 116.30769099999999, Timestamp('2008-10-23 11:03:06'), - 1, 4, 286.3387434682031, 'forro_tropykalia'], - [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 4, - 0.9311014399622559, 'forro_tropykalia'], - [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, 3, - 211.06912863495492, 'supermercado_aroldo'], - [39.993527, 116.32648300000001, Timestamp('2008-10-24 00:02:14'), - 2, 2, 279.6712398549538, 'policia_federal'], - [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, 6, - 792.7526066105717, 'adocao_de_animais'], - [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, 7, - 270.7018856738821, 'dia_do_municipio'] - ], - columns=[LATITUDE, LONGITUDE, DATETIME, TRAJ_ID, ID_POI, DIST_POI, NAME_POI], - index=[0, 1, 2, 3, 4, 5, 6, 7, 8] - ) - - integration.join_with_pois_optimizer(move_df, pois) + integration.join_with_pois(move_df, pois, inplace=True) assert_frame_equal(move_df, expected, check_dtype=False) @@ -572,11 +534,11 @@ def test_join_with_pois_by_category(): index=[0, 1, 2, 3, 4, 5, 6, 7, 8] ) - integration.join_with_pois_by_category(move_df, pois) + integration.join_with_pois_by_category(move_df, pois, inplace=True) assert_frame_equal(move_df, expected, check_dtype=False) -def test_join_with_poi_datetime(): +def test_join_with_events(): list_events = [ [39.984094, 116.319236, 1, Timestamp('2008-10-24 01:57:57'), 'show do tropykalia'], @@ -620,59 +582,11 @@ def test_join_with_poi_datetime(): index=[0, 1, 2, 3, 4, 5, 6, 7, 8] ) - integration.join_with_poi_datetime(move_df, pois, time_window=45000) + integration.join_with_events(move_df, pois, time_window=45000, inplace=True) assert_frame_equal(move_df, expected, check_dtype=False) -def test_join_with_poi_datetime_optimizer(): - list_events = [ - [39.984094, 116.319236, 1, - Timestamp('2008-10-24 01:57:57'), 'show do tropykalia'], - [39.991013, 116.326384, 2, - Timestamp('2008-10-24 00:22:01'), 'evento da prefeitura'], - [40.01, 116.312615, 3, - Timestamp('2008-10-25 00:21:01'), 'show do seu joao'], - [40.013821, 116.306531, 4, - Timestamp('2008-10-26 00:22:01'), 'missa'] - ] - move_df = MoveDataFrame(list_move) - pois = DataFrame( - data=list_events, - columns=[LATITUDE, LONGITUDE, EVENT_ID, DATETIME, EVENT_TYPE], - index=[0, 1, 2, 3] - ) - expected = DataFrame( - data=[ - [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1, - '', inf, ''], - [39.984559000000004, 116.326696, Timestamp('2008-10-23 10:37:26'), - 1, '', inf, ''], - [40.002899, 116.32151999999999, Timestamp('2008-10-23 10:50:16'), - 1, '', inf, ''], - [40.016238, 116.30769099999999, Timestamp('2008-10-23 11:03:06'), - 1, '', inf, ''], - [40.013814, 116.306525, Timestamp('2008-10-23 11:58:33'), 2, 2, - 3047.8382223981853, 'evento da prefeitura'], - [40.009735, 116.315069, Timestamp('2008-10-23 23:50:45'), 2, 2, - 2294.0758201547073, 'evento da prefeitura'], - [39.993527, 116.32648300000001, Timestamp('2008-10-24 00:02:14'), - 2, 2, 279.6712398549538, 'evento da prefeitura'], - [39.978575, 116.326975, Timestamp('2008-10-24 00:22:01'), 3, 1, - 900.7798955139455, 'show do tropykalia'], - [39.981668, 116.310769, Timestamp('2008-10-24 01:57:57'), 3, 1, - 770.188754517813, 'show do tropykalia'] - ], - columns=[ - LATITUDE, LONGITUDE, DATETIME, TRAJ_ID, EVENT_ID, DIST_EVENT, EVENT_TYPE - ], - index=[0, 1, 2, 3, 4, 5, 6, 7, 8] - ) - - integration.join_with_poi_datetime_optimizer(move_df, pois, time_window=45000) - assert_frame_equal(move_df, expected, check_dtype=False) - - -def test_join_with_pois_by_dist_and_datetime(): +def test_join_with_event_by_dist_and_time(): list_move = [ [39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 1], [39.984559000000004, 116.326696, Timestamp('2008-10-23 10:37:26'), 1], @@ -737,8 +651,8 @@ def test_join_with_pois_by_dist_and_datetime(): index=[0, 1, 2, 3, 4, 5, 6, 7, 8] ) - integration.join_with_pois_by_dist_and_datetime( - move_df, pois, radius=3000, time_window=7200 + integration.join_with_event_by_dist_and_time( + move_df, pois, radius=3000, time_window=7200, inplace=True ) assert_frame_equal(move_df, expected, check_dtype=False) @@ -782,11 +696,13 @@ def test_join_with_home_by_id(): columns=[TRAJ_ID, LATITUDE, LONGITUDE, DATETIME, DIST_HOME, HOME, CITY] ) - integration.join_with_home_by_id(move_df, home_df) + integration.join_with_home_by_id(move_df, home_df, inplace=True) assert_frame_equal(move_df, expected, check_dtype=False) move_df = MoveDataFrame(list_move) - integration.join_with_home_by_id(move_df, home_df, drop_id_without_home=True) + integration.join_with_home_by_id( + move_df, home_df, drop_id_without_home=True, inplace=True + ) expected = DataFrame( data=[ [1, 39.984094, 116.319236, Timestamp('2008-10-23 05:53:05'), 0.0, @@ -829,7 +745,7 @@ def test_merge_home_with_poi(): columns=[LATITUDE, LONGITUDE, TRAJ_ID, TYPE_POI, NAME_POI], index=[0, 1, 2, 3, 4, 5, 6] ) - integration.join_with_pois(move_df, pois) + integration.join_with_pois(move_df, pois, inplace=True) list_home = [ [39.984094, 116.319236, 1, 'rua da mae', 'quixiling'], @@ -840,7 +756,7 @@ def test_merge_home_with_poi(): columns=[LATITUDE, LONGITUDE, TRAJ_ID, ADDRESS, CITY], index=[0, 1] ) - integration.join_with_home_by_id(move_df, home_df) + integration.join_with_home_by_id(move_df, home_df, inplace=True) expected = DataFrame( data=[ @@ -868,5 +784,5 @@ def test_merge_home_with_poi(): ], index=[0, 1, 2, 3, 4, 5, 6] ) - integration.merge_home_with_poi(move_df) + integration.merge_home_with_poi(move_df, inplace=True) assert_frame_equal(move_df, expected, check_dtype=False) diff --git a/pymove/utils/distances.py b/pymove/utils/distances.py index ca9ef88c..ce27dddc 100644 --- a/pymove/utils/distances.py +++ b/pymove/utils/distances.py @@ -31,7 +31,7 @@ def haversine( """ Calculates the great circle distance between two points on the earth. - (specified in decimal degrees or in radians). All (lat, lon) coordinates + Specified in decimal degrees or in radians. All (lat, lon) coordinates must have numeric dtypes and be of equal length. Result in meters. Use 3956 in earth radius for miles. diff --git a/pymove/utils/geoutils.py b/pymove/utils/geoutils.py index 0b414165..6137e6fd 100644 --- a/pymove/utils/geoutils.py +++ b/pymove/utils/geoutils.py @@ -298,7 +298,7 @@ def create_bin_geohash_df(data: DataFrame, precision: float = 15): 3 39.984211 116.319389 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... 4 39.984217 116.319422 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... """ - _, _, _, bin_geohash = _reset_and_create_arrays_none(data) + *_, bin_geohash = _reset_and_create_arrays_none(data) for idx, row in progress_bar( data[[LATITUDE, LONGITUDE]].iterrows(), total=data.shape[0] diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index 387267a5..0cedb7ff 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -8,11 +8,9 @@ union_poi_police, join_collective_areas, join_with_pois, -join_with_pois_optimizer, join_with_pois_by_category, -join_with_poi_datetime, -join_with_poi_datetime_optimizer, -join_with_pois_by_dist_and_datetime, +join_with_events, +join_with_event_by_dist_and_time, join_with_home_by_id, merge_home_with_poi @@ -689,10 +687,11 @@ def join_with_pois( df_pois: DataFrame, label_id: Text = TRAJ_ID, label_poi_name: Text = NAME_POI, - reset_index: bool = True + reset_index: bool = True, + inplace: bool = False ): """ - Performs the integration between trajectories and points of interest. + Performs the integration between trajectories and the closest point of interest. Generating two new columns referring to the name and the distance from the point of interest closest @@ -711,6 +710,9 @@ def join_with_pois( reset_index : bool, optional Flag for reset index of the df_pois and data dataframes before the join, by default True + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False Examples -------- @@ -729,86 +731,24 @@ def join_with_pois( 1 39.991013 116.326384 2 policia policia_federal 2 40.010000 116.312615 3 comercio supermercado_aroldo >>> join_with_pois(move_df, pois) - >>> move_df - lat lon datetime id id_poi dist_poi name_poi - 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 0.000000 distrito_pol_1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 637.690216 distrito_pol_1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 3 1094.860663 supermercado_aroldo - 3 40.016238 116.307691 2008-10-23 11:03:06 1 3 810.542998 supermercado_aroldo - 4 40.013814 116.306525 2008-10-23 11:58:33 2 3 669.973155 supermercado_aroldo - 5 40.009735 116.315069 2008-10-23 23:50:45 2 3 211.069129 supermercado_aroldo - """ - values = _reset_and_creates_id_and_lat_lon(data, df_pois, True, reset_index) - current_distances, ids_pois, tag_pois, lat_user, lon_user = values - - for idx, row in progress_bar( - data.iterrows(), total=len(data), desc='Integration with POIs' - ): - # create a vector to each lat - lat_user.fill(row[LATITUDE]) - lon_user.fill(row[LONGITUDE]) - - # computing distances to idx - distances = np.float64( - haversine( - lat_user, - lon_user, - df_pois[LATITUDE].values, - df_pois[LONGITUDE].values, - ) - ) - - # get index to arg_min and min distance - index_min = np.argmin(distances) - current_distances[idx] = np.min(distances) - - # setting data for a single object movement - ids_pois[idx] = df_pois.at[index_min, label_id] - tag_pois[idx] = df_pois.at[index_min, label_poi_name] - - data[ID_POI] = ids_pois - data[DIST_POI] = current_distances - data[NAME_POI] = tag_pois - - logger.debug('Integration with POI was finalized') - - -def join_with_pois_optimizer( - data, - df_pois: DataFrame, - label_id: Text = TRAJ_ID, - label_poi_name: Text = NAME_POI, - dist_poi: Optional[List] = None, - reset_index: bool = True -): + lat lon datetime id id_poi \ + dist_poi name_poi + 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 \ + 0.000000 distrito_pol_1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 \ + 637.690216 distrito_pol_1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 3 \ + 1094.860663 supermercado_aroldo + 3 40.016238 116.307691 2008-10-23 11:03:06 1 3 \ + 810.542998 supermercado_aroldo + 4 40.013814 116.306525 2008-10-23 11:58:33 2 3 \ + 669.973155 supermercado_aroldo + 5 40.009735 116.315069 2008-10-23 23:50:45 2 3 \ + 211.069129 supermercado_aroldo """ - Performs the integration between trajectories and points of interest. - - Generating two new columns referring to the - name and distance from the nearest point of interest, - within the limit of distance determined by the parameter 'dist_poi', - of each point in the trajectory. - - Parameters - ---------- - data : DataFrame - The input trajectory data. - df_pois : DataFrame - The input point of interest data. - label_id : str, optional - Label of df_pois referring to the Point of Interest id, by default TRAJ_ID - label_poi_name : str, optional - Label of df_pois referring to the Point of Interest name, by default NAME_POI - dist_poi : list, optional - List containing the minimum distance limit between each type of - point of interest and each point of the trajectory to classify the - point of interest closest to each point of the trajectory, by default None - reset_index : bool, optional - Flag for reset index of the df_pois and data dataframes before the join, - by default True - """ - if dist_poi is None: - dist_poi = [] + if not inplace: + data = data.copy() + df_pois = df_pois.copy() values = _reset_and_creates_id_and_lat_lon(data, df_pois, False, reset_index) minimum_distances, ids_pois, tag_pois, lat_poi, lon_poi = values @@ -859,15 +799,19 @@ def join_with_pois_optimizer( data[NAME_POI] = tag_pois logger.debug('Integration with POI was finalized') + if not inplace: + return data + def join_with_pois_by_category( data: DataFrame, df_pois: DataFrame, label_category: Text = TYPE_POI, - label_id: Text = TRAJ_ID + label_id: Text = TRAJ_ID, + inplace: bool = False ): """ - Performs the integration between trajectories and points of interest. + Performs the integration between trajectories and each type of points of interest. Generating new columns referring to the category and distance from the nearest point of interest @@ -883,6 +827,9 @@ def join_with_pois_by_category( Label of df_pois referring to the point of interest category, by default TYPE_POI label_id : str, optional Label of df_pois referring to the point of interest id, by default TRAJ_ID + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False Examples -------- @@ -901,14 +848,25 @@ def join_with_pois_by_category( 1 39.991013 116.326384 2 policia policia_federal 2 40.010000 116.312615 3 comercio supermercado_aroldo >>> join_with_pois_by_category(move_df, pois) - lat lon datetime id id_policia dist_policia id_comercio dist_comercio - 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 0.000000 3 2935.310277 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 637.690216 3 3072.696379 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 2 1385.087181 3 1094.860663 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 2 3225.288831 3 810.542998 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 2 3047.838222 3 669.973155 - 5 40.009735 116.315069 2008-10-23 23:50:45 2 2 2294.075820 3 211.069129 + lat lon datetime id \ + id_policia dist_policia id_comercio dist_comercio + 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ + 1 0.000000 3 2935.310277 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 \ + 1 637.690216 3 3072.696379 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 \ + 2 1385.087181 3 1094.860663 + 3 40.016238 116.307691 2008-10-23 11:03:06 1 \ + 2 3225.288831 3 810.542998 + 4 40.013814 116.306525 2008-10-23 11:58:33 2 \ + 2 3047.838222 3 669.973155 + 5 40.009735 116.315069 2008-10-23 23:50:45 2 \ + 2 2294.075820 3 211.069129 """ + if not inplace: + data = data.copy() + df_pois = df_pois.copy() + logger.debug('Integration with POIs...') # get a vector with windows time to each point @@ -958,17 +916,21 @@ def join_with_pois_by_category( data['dist_%s' % c] = current_distances logger.debug('Integration with POI was finalized') + if not inplace: + return data + -def join_with_poi_datetime( +def join_with_events( data: DataFrame, df_events: DataFrame, label_date: Text = DATETIME, time_window: int = 900, label_event_id: Text = EVENT_ID, - label_event_type: Text = EVENT_TYPE + label_event_type: Text = EVENT_TYPE, + inplace: bool = False ): """ - Performs the integration between trajectories and points of interest. + Performs the integration between trajectories and the closest event in time window. Generating new columns referring to the category of the point of interest, the distance from the @@ -985,38 +947,44 @@ def join_with_poi_datetime( Label of data referring to the datetime of the input trajectory data, by default DATETIME time_window : float, optional - tolerable length of time range for assigning the event's + tolerable length of time range in `seconds` for assigning the event's point of interest to the trajectory point, by default 900 label_event_id : str, optional Label of df_events referring to the id of the event, by default EVENT_ID label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False Examples -------- - >>> from pymove.utils.integration import join_with_poi_datetime + >>> from pymove.utils.integration import join_with_events >>> move_df - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 - 5 40.009735 116.315069 2008-10-23 23:50:45 2 - >>> pois - lat lon event_id datetime event_type - 0 39.984094 116.319236 1 2008-10-24 01:57:57 show do tropykalia - 1 39.991013 116.326384 2 2008-10-24 00:22:01 evento da prefeitura - 2 40.010000 116.312615 3 2008-10-25 00:21:01 show do seu joao - >>> join_with_poi_datetime(move_df, pois) - >>> move_df - lat lon datetime id event_id dist_event event_type - 0 39.984094 116.319236 2008-10-23 05:53:05 1 inf - 1 39.984559 116.326696 2008-10-23 10:37:26 1 inf - 2 40.002899 116.321520 2008-10-23 10:50:16 1 inf - 3 40.016238 116.307691 2008-10-23 11:03:06 1 inf - 4 40.013814 116.306525 2008-10-23 11:58:33 2 inf - 5 40.009735 116.315069 2008-10-23 23:50:45 2 inf + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 39.993527 116.326483 2008-10-24 00:02:14 2 + 3 39.978575 116.326975 2008-10-24 00:22:01 3 + 4 39.981668 116.310769 2008-10-24 01:57:57 3 + >>> events + lat lon id datetime event_type event_id + 0 39.984094 116.319236 1 2008-10-23 05:53:05 show forro_tropykalia + 1 39.991013 116.326384 2 2008-10-23 10:37:26 show dia_do_municipio + 2 40.010000 116.312615 3 2008-10-24 01:57:57 feira adocao_de_animais + >>> join_with_events(move_df, events) + lat lon datetime id \ + event_type dist_event event_id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ + show 0.000000 forro_tropykalia + 1 39.984559 116.326696 2008-10-23 10:37:26 1 \ + show 718.144152 dia_do_municipio + 2 39.993527 116.326483 2008-10-24 00:02:14 2 \ + inf + 3 39.978575 116.326975 2008-10-24 00:22:01 3 \ + inf + 4 39.981668 116.310769 2008-10-24 01:57:57 3 \ + feira 3154.296880 adocao_de_animais Raises ------ @@ -1024,92 +992,10 @@ def join_with_poi_datetime( If feature generation fails """ - values = _reset_set_window__and_creates_event_id_type( - data, df_events, time_window, label_date - ) - window_starts, window_ends, current_distances, event_id, event_type = values - - for idx in progress_bar(data.index, total=len(data), desc='Integration with Events'): - # filter event by datetime - df_filtered = filters.by_datetime( - df_events, window_starts[idx], window_ends[idx] - ) - - if df_filtered is None: - raise ValueError('Filter datetime failed!') - - size_filter = df_filtered.shape[0] - - if size_filter > 0: - df_filtered.reset_index(drop=True, inplace=True) - lat_user = np.full( - size_filter, data.at[idx, LATITUDE], dtype=np.float64 - ) - lon_user = np.full( - size_filter, data.at[idx, LONGITUDE], dtype=np.float64 - ) - - # compute dist to poi filtered - distances = haversine( - lat_user, - lon_user, - df_filtered[LATITUDE].values, - df_filtered[LONGITUDE].values, - ) - # get index to arg_min - index_arg_min = np.argmin(distances) - # get min distances - min_distance = np.min(distances) - # store data - current_distances[idx] = min_distance - event_type[idx] = df_filtered.at[index_arg_min, label_event_type] - event_id[idx] = df_filtered.at[index_arg_min, label_event_id] - - data[label_event_id] = event_id - data[DIST_EVENT] = current_distances - data[label_event_type] = event_type - logger.debug('Integration with event was completed') - - -def join_with_poi_datetime_optimizer( - data: DataFrame, - df_events: DataFrame, - label_date: Text = DATETIME, - time_window: int = 900, - label_event_id: Text = EVENT_ID, - label_event_type: Text = EVENT_TYPE -): - """ - Performs a optimized integration between trajectories and points of events. - - Generating new columns referring to - the category of the event, the distance from the nearest - event and the time when the event happened at each point of - the trajectories. - - Parameters - ---------- - data : DataFrame - The input trajectory data. - df_events : DataFrame - The input events points of interest data. - label_date : str, optional - Label of data referring to the datetime of the input trajectory data, - by default DATETIME - time_window : float, optional - tolerable length of time range for assigning the event's - point of interest to the trajectory point, by default 900 - label_event_id : str, optional - Label of df_events referring to the id of the event, by default EVENT_ID - label_event_type : str, optional - Label of df_events referring to the type of the event, by default EVENT_TYPE - - Raises - ------ - ValueError - If feature generation fails + if not inplace: + data = data.copy() + df_events = df_events.copy() - """ values = _reset_set_window__and_creates_event_id_type( data, df_events, time_window, label_date ) @@ -1166,12 +1052,9 @@ def join_with_poi_datetime_optimizer( df_filtered[LONGITUDE].values, ) compare = current_distances < minimum_distances - minimum_distances = np.minimum( current_distances, minimum_distances ) - # compare = np.argmin(current_distances) - event_id[compare] = row[label_event_id] event_type[compare] = row[label_event_type] @@ -1180,18 +1063,22 @@ def join_with_poi_datetime_optimizer( data[label_event_type] = event_type logger.debug('Integration with events was completed') + if not inplace: + return data + -def join_with_pois_by_dist_and_datetime( +def join_with_event_by_dist_and_time( data: DataFrame, - df_pois: DataFrame, + df_events: DataFrame, label_date: Text = DATETIME, label_event_id: Text = EVENT_ID, label_event_type: Text = EVENT_TYPE, time_window: float = 3600, radius: float = 1000, + inplace: bool = False ): """ - Performs the integration between trajectories and points of interest. + Performs the integration between trajectories and events on windows. Generating new columns referring to the category of the point of interest, the distance between the location of the user and location of the poi @@ -1211,48 +1098,60 @@ def join_with_pois_by_dist_and_datetime( label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE time_window : float, optional - tolerable length of time range for assigning the event's + tolerable length of time range in `seconds`for assigning the event's point of interest to the trajectory point, by default 3600 radius: float, optional - maximum radius of pois, by default 1000 + maximum radius of pois in `meters`, by default 1000 + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False Examples -------- >>> from pymove.utils.integration import join_with_pois_by_dist_and_datetime >>> move_df - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 - 5 40.009735 116.315069 2008-10-23 23:50:45 2 - >>> pois - lat lon event_id datetime event_type - 0 39.984094 116.319236 1 2008-10-24 01:57:57 show do tropykalia - 1 39.991013 116.326384 2 2008-10-24 00:22:01 evento da prefeitura - 2 40.010000 116.312615 3 2008-10-25 00:21:01 show do seu joao + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 + 2 39.993527 116.326483 2008-10-24 00:02:14 2 + 3 39.978575 116.326975 2008-10-24 00:22:01 3 + 4 39.981668 116.310769 2008-10-24 01:57:57 3 + >>> events + lat lon id datetime type_poi name_poi + 0 39.984094 116.319236 1 2008-10-23 05:53:05 show forro_tropykalia + 1 39.991013 116.326384 2 2008-10-23 10:27:26 corrida racha_de_jumento + 2 39.990013 116.316384 2 2008-10-23 10:37:26 show dia_do_municipio + 3 40.010000 116.312615 3 2008-10-24 01:57:57 feira adocao_de_animais >>> join_with_pois_by_dist_and_datetime(move_df, pois) >>> move_df - lat lon datetime id event_id dist_event event_type - 0 39.984094 116.319236 2008-10-23 05:53:05 1 None None None - 1 39.984559 116.326696 2008-10-23 10:37:26 1 None None None - 2 40.002899 116.321520 2008-10-23 10:50:16 1 None None None - 3 40.016238 116.307691 2008-10-23 11:03:06 1 None None None - 4 40.013814 116.306525 2008-10-23 11:58:33 2 None None None - 5 40.009735 116.315069 2008-10-23 23:50:45 2 None None None - + lat lon datetime id \ + type_poi dist_event name_poi + 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ + [show] [0.0] [forro_tropykalia] + 1 39.984559 116.326696 2008-10-23 10:37:26 1 \ + [corrida, show] [718.144, 1067.53] [racha_de_jumento, dia_do_municipio] + 2 39.993527 116.326483 2008-10-24 00:02:14 2 \ + None None None + 3 39.978575 116.326975 2008-10-24 00:22:01 3 \ + None None None + 4 39.981668 116.310769 2008-10-24 01:57:57 3 \ + None None None + Raises ------ ValueError If feature generation fails """ - if label_date not in df_pois: + if label_date not in df_events: raise KeyError("POI's DataFrame must contain a %s column" % label_date) + if not inplace: + data = data.copy() + df_events = df_events.copy() + values = _reset_set_window_and_creates_event_id_type_all( - data, df_pois, time_window, label_date + data, df_events, time_window, label_date ) window_start, window_end, current_distances, event_id, event_type = values @@ -1267,7 +1166,7 @@ def join_with_pois_by_dist_and_datetime( # filter event by radius df_filtered = filters.by_bbox( - df_pois, bbox, inplace=False + df_events, bbox, inplace=False ) if df_filtered is None: @@ -1313,6 +1212,9 @@ def join_with_pois_by_dist_and_datetime( data[label_event_type] = event_type logger.debug('Integration with event was completed') + if not inplace: + return data + def join_with_home_by_id( data: DataFrame, @@ -1321,6 +1223,7 @@ def join_with_home_by_id( label_address: Text = ADDRESS, label_city: Text = CITY, drop_id_without_home: bool = False, + inplace: bool = False ): """ Performs the integration between trajectories and home points. @@ -1341,7 +1244,10 @@ def join_with_home_by_id( label_city : str, optional Label of df_home referring to the point city, by default CITY drop_id_without_home : bool, optional - flag as an option to drop id's that don't have houses, by default FALSE + flag as an option to drop id's that don't have houses, by default False + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False Examples -------- @@ -1360,14 +1266,25 @@ def join_with_home_by_id( 1 40.013821 116.306531 2 rua da familia quixeramoling >>> join_with_home_by_id(move_df, home_df) >>> move_df - id lat lon datetime dist_home home city - 0 1 39.984094 116.319236 2008-10-23 05:53:05 0.000000 rua da mae quixiling - 1 1 39.984559 116.326696 2008-10-23 10:37:26 637.690216 rua da mae quixiling - 2 1 40.002899 116.321520 2008-10-23 10:50:16 2100.053501 rua da mae quixiling - 3 1 40.016238 116.307691 2008-10-23 11:03:06 3707.066732 rua da mae quixiling - 4 2 40.013814 116.306525 2008-10-23 11:58:33 0.931101 rua da familia quixeramoling - 5 2 40.009735 116.315069 2008-10-23 23:50:45 857.417540 rua da familia quixeramoling + id lat lon datetime dist_home \ + home city + 0 1 39.984094 116.319236 2008-10-23 05:53:05 0.000000 \ + rua da mae quixiling + 1 1 39.984559 116.326696 2008-10-23 10:37:26 637.690216 \ + rua da mae quixiling + 2 1 40.002899 116.321520 2008-10-23 10:50:16 2100.053501 \ + rua da mae quixiling + 3 1 40.016238 116.307691 2008-10-23 11:03:06 3707.066732 \ + rua da mae quixiling + 4 2 40.013814 116.306525 2008-10-23 11:58:33 0.931101 \ + rua da familia quixeramoling + 5 2 40.009735 116.315069 2008-10-23 23:50:45 857.417540 \ + rua da familia quixeramoling """ + if not inplace: + data = data.copy() + df_home = df_home.copy() + ids_without_home = [] if data.index.name is None: @@ -1415,6 +1332,9 @@ def join_with_home_by_id( if drop_id_without_home: data.drop(data.loc[data[TRAJ_ID].isin(ids_without_home)].index, inplace=True) + if not inplace: + return data + def merge_home_with_poi( data: DataFrame, @@ -1424,6 +1344,7 @@ def merge_home_with_poi( label_home: Text = HOME, label_dist_home: Text = DIST_HOME, drop_columns: bool = True, + inplace: bool = False ): """ Performs or merges the points of interest and the trajectories. @@ -1451,71 +1372,71 @@ def merge_home_with_poi( by default DIST_HOME drop_columns : bool, optional Flag that controls the deletion of the columns referring to the - id and the distance from the home point, by default True + id and the distance from the home point, by default + inplace : boolean, optional + if set to true the original dataframe will be altered to contain + the result of the filtering, otherwise a copy will be returned, by default False Examples -------- - >>> from pymove.utils.integration import merge_home_with_poi, join_with_pois, - join_with_home_by_id - >>> move_df - lat lon datetime id - 0 39.984094 116.319236 2008-10-23 05:53:05 1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 - 3 40.016238 116.307691 2008-10-23 11:03:06 1 - 4 40.013814 116.306525 2008-10-23 11:58:33 2 - 5 40.009735 116.315069 2008-10-23 23:50:45 2 - >>> pois - lat lon id type_poi name_poi - 0 39.984094 116.319236 1 policia distrito_pol_1 - 1 39.991013 116.326384 2 policia policia_federal - >>> join_with_pois(move_df, pois) + >>> from pymove.utils.integration import ( + >>> merge_home_with_poi, + >>> join_with_home_by_id + >>> ) >>> move_df - lat lon datetime id id_poi dist_poi name_poi - 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 0.000000 distrito_pol_1 - 1 39.984559 116.326696 2008-10-23 10:37:26 1 1 637.690216 distrito_pol_1 - 2 40.002899 116.321520 2008-10-23 10:50:16 1 2 1385.087181 policia_federal - 3 40.016238 116.307691 2008-10-23 11:03:06 1 2 3225.288831 policia_federal - 4 40.013814 116.306525 2008-10-23 11:58:33 2 2 3047.838222 policia_federal - 5 40.009735 116.315069 2008-10-23 23:50:45 2 2 2294.075820 policia_federal + lat lon datetime id \ + id_poi dist_poi name_poi + 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ + 1 0.000000 distrito_pol_1 + 1 39.984559 116.326696 2008-10-23 10:37:26 1 \ + 1 637.690216 distrito_pol_1 + 2 40.002899 116.321520 2008-10-23 10:50:16 1 \ + 2 1385.087181 policia_federal + 3 40.016238 116.307691 2008-10-23 11:03:06 1 \ + 2 3225.288831 policia_federal + 4 40.013814 116.306525 2008-10-23 11:58:33 2 \ + 2 3047.838222 policia_federal + 5 40.009735 116.315069 2008-10-23 23:50:45 2 \ + 2 2294.075820 policia_federal >>> home_df lat lon id formatted_address city 0 39.984094 116.319236 1 rua da mae quixiling 1 40.013821 116.306531 2 rua da familia quixeramoling - >>> join_with_home_by_id(move, home_df) + >>> join_with_home_by_id(move, home_df, inplace=True) >>> move_df - id lat lon datetime id_poi dist_poi\ + id lat lon datetime id_poi dist_poi \ name_poi dist_home home city - 0 1 39.984094 116.319236 2008-10-23 05:53:05 1 0.000000\ + 0 1 39.984094 116.319236 2008-10-23 05:53:05 1 0.000000 \ distrito_pol_1 0.000000 rua da mae quixiling - 1 1 39.984559 116.326696 2008-10-23 10:37:26 1 637.690216\ + 1 1 39.984559 116.326696 2008-10-23 10:37:26 1 637.690216 \ distrito_pol_1 637.690216 rua da mae quixiling - 2 1 40.002899 116.321520 2008-10-23 10:50:16 2 1385.087181\ + 2 1 40.002899 116.321520 2008-10-23 10:50:16 2 1385.087181 \ policia_federal 2100.053501 rua da mae quixiling - 3 1 40.016238 16.307691 2008-10-23 11:03:06 2 3225.288831\ + 3 1 40.016238 16.307691 2008-10-23 11:03:06 2 3225.288831 \ policia_federal 3707.066732 rua da mae quixiling - 4 2 40.013814 116.306525 2008-10-23 11:58:33 2 3047.838222\ + 4 2 40.013814 116.306525 2008-10-23 11:58:33 2 3047.838222 \ policia_federal 0.931101 rua da familia quixeramoling - 5 2 40.009735 116.315069 2008-10-23 23:50:45 2 2294.075820\ + 5 2 40.009735 116.315069 2008-10-23 23:50:45 2 2294.075820 \ policia_federal 857.417540 rua da familia quixeramoling - >>> merge_home_with_poi(move_df) # MAIN FUNCTION - id lat lon datetime id_poi\ + >>> merge_home_with_poi(move_df) + id lat lon datetime id_poi \ dist_poi name_poi city - 0 1 39.984094 116.319236 2008-10-23 05:53:05 rua da mae\ + 0 1 39.984094 116.319236 2008-10-23 05:53:05 rua da mae \ 0.000000 home quixiling - 1 1 39.984559 116.326696 2008-10-23 10:37:26 rua da mae\ + 1 1 39.984559 116.326696 2008-10-23 10:37:26 rua da mae \ 637.690216 home quixiling - 2 1 40.002899 116.321520 2008-10-23 10:50:16 2\ + 2 1 40.002899 116.321520 2008-10-23 10:50:16 2 \ 1385.087181 policia_federal quixiling - 3 1 40.016238 116.307691 2008-10-23 11:03:06 2\ + 3 1 40.016238 116.307691 2008-10-23 11:03:06 2 \ 3225.288831 policia_federal quixiling - 4 2 40.013814 116.306525 2008-10-23 11:58:33 rua da familia\ + 4 2 40.013814 116.306525 2008-10-23 11:58:33 rua da familia \ 0.931101 home quixeramoling - 5 2 40.009735 116.315069 2008-10-23 23:50:45 rua da familia\ + 5 2 40.009735 116.315069 2008-10-23 23:50:45 rua da familia \ 857.417540 home quixeramoling - - """ + if not inplace: + data = data.copy() + logger.debug('merge home with POI using shortest distance') idx = data[data[label_dist_home] <= data[label_dist_poi]].index @@ -1525,3 +1446,6 @@ def merge_home_with_poi( if(drop_columns): data.drop(columns=[label_dist_home, label_home], inplace=True) + + if not inplace: + return data From a699609da8e3a02010c15ae9f394bc90865741a9 Mon Sep 17 00:00:00 2001 From: flych3r Date: Sat, 26 Jun 2021 11:04:12 -0300 Subject: [PATCH 37/56] changed assert to assert_almost_equal due to precision --- pymove/tests/test_utils_conversions.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pymove/tests/test_utils_conversions.py b/pymove/tests/test_utils_conversions.py index 46f315bd..7b00086a 100644 --- a/pymove/tests/test_utils_conversions.py +++ b/pymove/tests/test_utils_conversions.py @@ -1,4 +1,5 @@ from numpy import nan +from numpy.testing import assert_almost_equal from pandas import DataFrame, Timestamp from pandas.testing import assert_frame_equal from shapely.geometry import Point @@ -39,7 +40,7 @@ def test_lat_meters(): lat_in_meters = conversions.lat_meters(39.984094) - assert(lat_in_meters == expected) + assert_almost_equal(lat_in_meters, expected) def test_list_to_str(): @@ -63,28 +64,28 @@ def test_lon_to_x_spherical(): expected = -4285978.172767829 - assert(conversions.lon_to_x_spherical(-38.501597) == expected) + assert_almost_equal(conversions.lon_to_x_spherical(-38.501597), expected) def test_lat_to_y_spherical(): expected = -423086.2213610324 - assert(conversions.lat_to_y_spherical(-3.797864) == expected) + assert_almost_equal(conversions.lat_to_y_spherical(-3.797864), expected) def test_x_to_lon_spherical(): expected = -38.50159697513617 - assert(conversions.x_to_lon_spherical(-4285978.17) == expected) + assert_almost_equal(conversions.x_to_lon_spherical(-4285978.17), expected) def test_y_to_lat_spherical(): expected = -35.89350841198311 - assert(conversions.y_to_lat_spherical(-4285978.17) == expected) + assert_almost_equal(conversions.y_to_lat_spherical(-4285978.17), expected) def test_geometry_points_to_lat_and_lon(): From 02798310fdf8b7c80e7da53c2415801eb5d923a8 Mon Sep 17 00:00:00 2001 From: flych3r Date: Sat, 26 Jun 2021 11:12:28 -0300 Subject: [PATCH 38/56] fix list indentation --- .deployment-instructions.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.deployment-instructions.md b/.deployment-instructions.md index 7bb75745..695bec2c 100644 --- a/.deployment-instructions.md +++ b/.deployment-instructions.md @@ -13,10 +13,9 @@ The link in this tutorial will explain the steps to upload a package to pypi: :` 5. Now, the pull request will be checked. - - Comlete the checklist for the pull requests. + - Complete the checklist for the pull requests. - The recipe meta.yaml file will be checked by the `conda-forge-linting service`. From 8d29cc120872a39bff8d085ebad4d8f4a3dbed0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Mon, 5 Jul 2021 23:59:42 -0300 Subject: [PATCH 39/56] Putting examples on Folium module --- pymove/visualization/folium.py | 379 ++++++++++++++++++++++++++++++++- 1 file changed, 378 insertions(+), 1 deletion(-) diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 2eb84256..70837e2e 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -93,6 +93,18 @@ def save_map( ------- Map folium map or None + + Examples + -------- + >>> from pymove.visualization.folium import save_map + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> save_map(df, filename = '/content/test.map') """ map_ = folium.Map(tiles=tiles) map_.fit_bounds( @@ -148,6 +160,18 @@ def create_base_map( ------- Map a folium map + + Examples + -------- + >>> from pymove.visualization.folium import create_base_map + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> create_base_map(move_df) """ if lat_origin is None and lon_origin is None: lat_origin = move_data[LATITUDE].median() @@ -205,6 +229,18 @@ def heatmap( ------- Map folium Map + + Examples + -------- + >>> from pymove.visualization.folium import heatmap + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> heatmap(move_df) """ if base_map is None: base_map = create_base_map( @@ -284,6 +320,18 @@ def heatmap_with_time( ------- Map folium Map + + Examples + -------- + >>> from pymove.visualization.folium import heatmap_with_time + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> heatmap_with_time(move_df) """ if base_map is None: base_map = create_base_map( @@ -369,6 +417,18 @@ def cluster( ------- Map folium Map + + Examples + -------- + >>> from pymove.visualization.folium import cluster + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> cluster(move_df) """ if base_map is None: base_map = create_base_map( @@ -447,6 +507,18 @@ def faster_cluster( ------- Map folium Map + + Examples + -------- + >>> from pymove.visualization.folium import faster_cluster + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> faster_cluster(move_df) """ if base_map is None: base_map = create_base_map( @@ -520,6 +592,18 @@ def plot_markers( ------- Map folium Map + + Examples + -------- + >>> from pymove.visualization.folium import plot_markers + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_markers(move_df) """ if base_map is None: base_map = create_base_map( @@ -596,6 +680,26 @@ def _filter_and_generate_colors( list of tuples list containing a combination of id and color + Examples + -------- + >>> from pymove.visualization.folium import _filter_and_generate_colors + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> _filter_and_generate_colors(move_df) + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + ... ... ... ... .. + [5000 rows x 4 columns], [(1, '#e41a1c')]) + """ if n_rows is None: n_rows = move_data.shape[0] @@ -664,6 +768,22 @@ def _filter_generated_feature( dataframe filtered dataframe + Examples + -------- + >>> from pymove.visualization.folium import _filter_generated_feature + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> print(_filter_generated_feature(df8, feature='lat', values=[39.984198])) + lat lon datetime id + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + >>> print(_filter_generated_feature(df8, feature='lon', values=[116.319236])) + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 """ if len(values) == 1: mv_df = move_data[move_data[feature] == values[0]] @@ -699,6 +819,19 @@ def _add_begin_end_markers_to_folium_map( Color of the markers, by default None id: int, optional Id of the trajectory, by default None + + Examples + -------- + >>> from pymove.visualization.folium import _add_begin_end_markers_to_folium_map + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> map = create_base_map(move_df) + >>> _add_begin_end_markers_to_folium_map(move_df, map) """ points = folium.map.FeatureGroup( 'The start and end points of trajectory {}'.format(_id or '') @@ -751,6 +884,17 @@ def _add_trajectories_to_folium_map( filename : str, optional Represents the file name of new file .html, by default 'map.html'. + Examples + -------- + >>> from pymove.visualization.folium import _add_trajectories_to_folium_map + >>> move_df + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + ... ... ... ... ... + 113605 39.988118 116.326672 2008-10-25 14:39:19 5 + 113606 39.987965 116.326675 2008-10-25 14:39:24 5 + >>> _add_trajectories_to_folium_map(move_data = df10, base_map = map1,items=[(1, 'red'), [5, 'green']]) """ for _id, color in items: mv = move_data[move_data[TRAJ_ID] == _id] @@ -826,6 +970,17 @@ def plot_trajectories_with_folium( Map a folium map with visualization. + Examples + -------- + >>> from pymove.visualization.folium import plot_trajectories_with_folium + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_trajectories_with_folium(move_df) """ if base_map is None: base_map = create_base_map( @@ -907,6 +1062,17 @@ def plot_trajectory_by_id_folium( IndexError If there is no user with the id passed + Examples + -------- + >>> from pymove.visualization.folium import plot_trajectory_by_id_folium + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_trajectory_by_id_folium(move_df, id_=1) """ if base_map is None: base_map = create_base_map( @@ -994,6 +1160,26 @@ def plot_trajectory_by_period( IndexError If there is no user with the id passed + Examples + -------- + >>> from pymove.visualization.folium import plot_trajectory_by_period + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_trajectory_by_period(move_df, period='Early morning') + >>> move_df.head() + lat lon datetime id period + 0 39.984094 116.319236 2008-10-23 05:53:05 1 Early morning + 1 39.984198 116.319322 2008-10-23 05:53:06 1 Early morning + 2 39.984224 116.319402 2008-10-23 05:53:11 1 Early morning + 3 39.984211 116.319389 2008-10-23 05:53:16 1 Early morning + 4 39.984217 116.319422 2008-10-23 05:53:21 1 Early morning + >>> move_df['period'].unique() + array(['Early morning', 'Morning', 'Afternoon', 'Evening'], dtype=object) """ if base_map is None: base_map = create_base_map( @@ -1085,6 +1271,26 @@ def plot_trajectory_by_day_week( IndexError If there is no user with the id passed + Examples + -------- + >>> from pymove.visualization.folium import plot_trajectory_by_day_week + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_trajectory_by_day_week(move_df, day_week='Friday') + >>> move_df.head() + lat lon datetime id day + 0 39.984094 116.319236 2008-10-23 05:53:05 1 Thursday + 1 39.984198 116.319322 2008-10-23 05:53:06 1 Thursday + 2 39.984224 116.319402 2008-10-23 05:53:11 1 Thursday + 3 39.984211 116.319389 2008-10-23 05:53:16 1 Thursday + 4 39.984217 116.319422 2008-10-23 05:53:21 1 Thursday + >>> move_df.day.unique() + array(['Thursday', 'Friday', 'Saturday'], dtype=object) """ if base_map is None: base_map = create_base_map( @@ -1179,9 +1385,30 @@ def plot_trajectory_by_date( IndexError If there is no user with the id passed + Examples + -------- + >>> from pymove.visualization.folium import plot_trajectory_by_date + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_trajectory_by_date(move_df, start_date = '2008-10-23 05:53:05',end_date = '2008-10-23 23:43:56') + >>> move_df.head() + lat lon datetime id date + 0 39.984094 116.319236 2008-10-23 05:53:05 1 2008-10-23 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 2008-10-23 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 2008-10-23 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 2008-10-23 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 2008-10-23 + >>> move_df.date.unique() + array([datetime.date(2008, 10, 23), datetime.date(2008, 10, 24), + datetime.date(2008, 10, 25)], dtype=object) """ if base_map is None: - base_map = create_base_map( + bfrom pymove.visualization.folium import plot_trajectory_by_hourase_map = create_base_map( move_data, lat_origin, lon_origin, @@ -1279,6 +1506,25 @@ def plot_trajectory_by_hour( IndexError If there is no user with the id passed + Examples + -------- + >>> from pymove.visualization.folium import plot_trajectory_by_hour + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_trajectory_by_hour(move_df, start_hour=5,end_hour = 6) + lat lon datetime id hour + 0 39.984094 116.319236 2008-10-23 05:53:05 1 5 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 5 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 5 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 5 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 5 + >>> move_df['hour'].unique() + array([ 5, 6, 10, 11, 12, 23, 0, 1, 2, 3, 4]) """ if base_map is None: base_map = create_base_map( @@ -1370,6 +1616,30 @@ def plot_stops( IndexError If there is no user with the id passed + Examples + -------- + >>> from pymove.visualization.folium import plot_stops + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_stops(move_df) + >>> move_df.head() + lat lon datetime id\ + dist_to_prev dist_to_next dist_prev_to_next situation + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + NaN 13.690153 NaN nan + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 13.690153 7.403788 20.223428 move + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 7.403788 1.821083 5.888579 move + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 1.821083 2.889671 1.873356 move + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 2.889671 66.555997 68.727260 move """ if base_map is None: base_map = create_base_map( @@ -1443,6 +1713,10 @@ def plot_bbox( Map folium map with bounding box + Examples + -------- + >>> from pymove.visualization.folium import plot_bbox + >>> plot_bbox((39.984094,116.319236,39.997535,116.196345)) """ if base_map is None: base_map = folium.Map(tiles=tiles) @@ -1479,6 +1753,20 @@ def _format_tags(line: Union[List, Dict], slice_: List) -> Text: ------- str: formatted html tag + Examples + -------- + >>> from pymove.visualization.folium import _format_tags, plot_points_folium + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> _format_tags(line={'lat': 39.984094, 'lon': 116.319236, 'datetime': '2008-10-23 05:53:05', 'id': 1} + , slice_=['lat', 'lon', 'datetime', 'id']) + lat: 39.984094
    lon: 116.319236
    datetime: 2008-10-23 05:53:05
    id: 1 + >>> plot_points_folium(move_df.head()) """ map_formated_tags = map(lambda tag: '{}: {}'.format(tag, line[tag]), slice_) @@ -1511,6 +1799,20 @@ def _circle_maker( radius: float. radius size. map_: Folium map. + + Examples + -------- + >>> from pymove.visualization.folium import _circle_maker + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> row = move_df.iloc[0]; iter_tuple = (0, row); user_lat = 'lat'; user_lon = 'lon' + >>> slice_tags = row.keys(); user_point = 'pink'; radius = 10; map_ = create_base_map(move_df) + >>> _circle_maker(iter_tuple, user_lat, user_lon, slice_tags, user_point, radius, map_) """ _, line = iter_tuple @@ -1572,6 +1874,18 @@ def plot_points_folium( ------- Map A folium map + + Examples + -------- + >>> from pymove.visualization.folium import plot_points_folium + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_points_folium(move_df) """ if slice_tags is None: slice_tags = list(move_data.columns) @@ -1645,6 +1959,18 @@ def plot_poi_folium( ------- folium.folium.Map. Represents a folium map with visualization. + + Examples + -------- + >>> from pymove.visualization.folium import plot_poi_folium + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_poi_folium(move_df) """ return plot_points_folium( move_data, @@ -1699,6 +2025,18 @@ def plot_event_folium( Returns ------- A folium map. + + Examples + -------- + >>> from pymove.visualization.folium import plot_event_folium + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_event_folium(move_df) """ return plot_points_folium( move_data, @@ -1783,6 +2121,9 @@ def show_trajs_with_event( ------ ValueError If feature generation fails + + Examples + -------- """ # building structure for deltas delta_event = pd.to_timedelta(window_time_event, unit='s') @@ -1970,6 +2311,9 @@ def show_traj_id_with_event( ------- Map A list of folium maps. + + Examples + -------- """ df_id = move_data[move_data[user_id] == subject_id] @@ -2019,6 +2363,27 @@ def _create_geojson_features_line( ------- list GeoJSON features. + + Examples + -------- + >>> from pymove.visualization.folium import _create_geojson_features_line + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> _create_geojson_features_line(move_df) + [{'geometry': {'coordinates': [[116.319236, 39.984094], + [116.319322, 39.984198]], + 'type': 'Linestr'}, + 'properties': {'popup': 'lat: 39.984094
    lon: 116.319236
    datetime: 2008-10-23 05:53:05
    id: 1', + 'style': {'color': 'red', + 'icon': 'circle', + 'iconstyle': {'color': 'red', 'weight': 4}}, + 'times': ['2008-10-23T05:53:05', '2008-10-23T05:53:06']}, + 'type': 'Feature'},...] """ features = [] @@ -2096,6 +2461,18 @@ def plot_traj_timestamp_geo_json( ------- Map A folium map. + + Examples + -------- + >>> from pymove.visualization.folium import plot_traj_timestamp_geo_json + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 1 + 4 39.984217 116.319422 2008-10-23 05:53:21 1 + >>> plot_traj_timestamp_geo_json(move_df) """ features = _create_geojson_features_line( move_data, From 2b171e37fe1802979b03862e342d3bd6398932de Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 6 Jul 2021 16:05:00 -0300 Subject: [PATCH 40/56] fix some linting issues --- pymove/tests/test_visualization_folium.py | 67 ---- pymove/visualization/folium.py | 435 +++++----------------- 2 files changed, 94 insertions(+), 408 deletions(-) diff --git a/pymove/tests/test_visualization_folium.py b/pymove/tests/test_visualization_folium.py index 588c71e5..300c2f33 100644 --- a/pymove/tests/test_visualization_folium.py +++ b/pymove/tests/test_visualization_folium.py @@ -1076,73 +1076,6 @@ def test_plot_event_folium(tmpdir): ) -def test_show_trajs_with_event(): - - move_df = _default_move_df() - - df_event = move_df.iloc[0:3, :] - - list_ = folium.show_trajs_with_event( - move_data=move_df, - window_time_subject=4, - df_event=df_event, - window_time_event=4, - radius=150, - event_lat=LATITUDE, - event_lon=LONGITUDE, - event_datetime=DATETIME, - user_lat=LATITUDE, - user_lon=LONGITUDE, - user_datetime=DATETIME, - event_id='id', - event_point=EVENT_POINT, - user_id='id', - user_point=USER_POINT, - line_color=LINE_COLOR, - slice_event_show=None, - slice_subject_show=None, - ) - - assert len(list_) == 3, 'list with wrong number of elements' - for i in list_: - base_map = i[0] - assert(base_map.control_scale is True) - - -def test_show_traj_id_with_event(): - - move_df = _default_move_df() - - df_event = move_df.iloc[0:3, :] - - list_ = folium.show_traj_id_with_event( - move_data=move_df, - window_time_subject=4, - subject_id=1, - df_event=df_event, - window_time_event=4, - radius=150, - event_lat=LATITUDE, - event_lon=LONGITUDE, - event_datetime=DATETIME, - user_lat=LATITUDE, - user_lon=LONGITUDE, - user_datetime=DATETIME, - event_id='id', - event_point=EVENT_POINT, - user_id='id', - user_point=USER_POINT, - line_color=LINE_COLOR, - slice_event_show=None, - slice_subject_show=None, - ) - - assert type(list_) == tuple, 'Wrong type' - assert len(list_) == 2, 'list with wrong number of elements' - assert len(list_[1]) == 2 - assert list_[0].control_scale is True - - def test_create_geojson_features_line(): move_df = _default_move_df() diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 70837e2e..2a1fc102 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -30,14 +30,11 @@ import folium import numpy as np -import pandas as pd from folium import Map, plugins from folium.plugins import FastMarkerCluster, HeatMap, HeatMapWithTime, MarkerCluster from pandas import DataFrame from pymove import PandasMoveDataFrame -from pymove.preprocessing import filters -from pymove.utils import distances from pymove.utils.constants import ( COUNT, DATE, @@ -104,7 +101,7 @@ def save_map( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> save_map(df, filename = '/content/test.map') + >>> save_map(df, filename='test.map') """ map_ = folium.Map(tiles=tiles) map_.fit_bounds( @@ -688,18 +685,18 @@ def _filter_and_generate_colors( 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> _filter_and_generate_colors(move_df) + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> df, colors = _filter_and_generate_colors(move_df) + >>> df lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 - ... ... ... ... .. - [5000 rows x 4 columns], [(1, '#e41a1c')]) - + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> colors + [(1, '#e41a1c'), (2, '#377eb8')] """ if n_rows is None: n_rows = move_data.shape[0] @@ -778,10 +775,10 @@ def _filter_generated_feature( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> print(_filter_generated_feature(df8, feature='lat', values=[39.984198])) + >>> _filter_generated_feature(move_df, feature='lat', values=[39.984198]) lat lon datetime id 1 39.984198 116.319322 2008-10-23 05:53:06 1 - >>> print(_filter_generated_feature(df8, feature='lon', values=[116.319236])) + >>> _filter_generated_feature(move_df, feature='lon', values=[116.319236]) lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 """ @@ -891,10 +888,13 @@ def _add_trajectories_to_folium_map( lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 - ... ... ... ... ... - 113605 39.988118 116.326672 2008-10-25 14:39:19 5 - 113606 39.987965 116.326675 2008-10-25 14:39:24 5 - >>> _add_trajectories_to_folium_map(move_data = df10, base_map = map1,items=[(1, 'red'), [5, 'green']]) + 3 39.988118 116.326672 2008-10-25 14:39:19 5 + 4 39.987965 116.326675 2008-10-25 14:39:24 5 + >>> _add_trajectories_to_folium_map( + >>> move_data=move_df, + >>> base_map=map1, + >>> items=[(1, 'red'), [5, 'green']] + >>> ) """ for _id, color in items: mv = move_data[move_data[TRAJ_ID] == _id] @@ -1070,8 +1070,8 @@ def plot_trajectory_by_id_folium( 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 2 39.984224 116.319402 2008-10-23 05:53:11 1 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 >>> plot_trajectory_by_id_folium(move_df, id_=1) """ if base_map is None: @@ -1178,8 +1178,6 @@ def plot_trajectory_by_period( 2 39.984224 116.319402 2008-10-23 05:53:11 1 Early morning 3 39.984211 116.319389 2008-10-23 05:53:16 1 Early morning 4 39.984217 116.319422 2008-10-23 05:53:21 1 Early morning - >>> move_df['period'].unique() - array(['Early morning', 'Morning', 'Afternoon', 'Evening'], dtype=object) """ if base_map is None: base_map = create_base_map( @@ -1289,8 +1287,6 @@ def plot_trajectory_by_day_week( 2 39.984224 116.319402 2008-10-23 05:53:11 1 Thursday 3 39.984211 116.319389 2008-10-23 05:53:16 1 Thursday 4 39.984217 116.319422 2008-10-23 05:53:21 1 Thursday - >>> move_df.day.unique() - array(['Thursday', 'Friday', 'Saturday'], dtype=object) """ if base_map is None: base_map = create_base_map( @@ -1395,7 +1391,11 @@ def plot_trajectory_by_date( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_trajectory_by_date(move_df, start_date = '2008-10-23 05:53:05',end_date = '2008-10-23 23:43:56') + >>> plot_trajectory_by_date( + >>> move_df, + >>> start_date='2008-10-23 05:53:05', + >>> end_date='2008-10-23 23:43:56' + >>> ) >>> move_df.head() lat lon datetime id date 0 39.984094 116.319236 2008-10-23 05:53:05 1 2008-10-23 @@ -1403,12 +1403,9 @@ def plot_trajectory_by_date( 2 39.984224 116.319402 2008-10-23 05:53:11 1 2008-10-23 3 39.984211 116.319389 2008-10-23 05:53:16 1 2008-10-23 4 39.984217 116.319422 2008-10-23 05:53:21 1 2008-10-23 - >>> move_df.date.unique() - array([datetime.date(2008, 10, 23), datetime.date(2008, 10, 24), - datetime.date(2008, 10, 25)], dtype=object) """ if base_map is None: - bfrom pymove.visualization.folium import plot_trajectory_by_hourase_map = create_base_map( + base_map = create_base_map( move_data, lat_origin, lon_origin, @@ -1516,15 +1513,13 @@ def plot_trajectory_by_hour( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_trajectory_by_hour(move_df, start_hour=5,end_hour = 6) + >>> plot_trajectory_by_hour(move_df, start_hour=4,end_hour=6) lat lon datetime id hour 0 39.984094 116.319236 2008-10-23 05:53:05 1 5 1 39.984198 116.319322 2008-10-23 05:53:06 1 5 2 39.984224 116.319402 2008-10-23 05:53:11 1 5 3 39.984211 116.319389 2008-10-23 05:53:16 1 5 4 39.984217 116.319422 2008-10-23 05:53:21 1 5 - >>> move_df['hour'].unique() - array([ 5, 6, 10, 11, 12, 23, 0, 1, 2, 3, 4]) """ if base_map is None: base_map = create_base_map( @@ -1628,17 +1623,17 @@ def plot_stops( 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> plot_stops(move_df) >>> move_df.head() - lat lon datetime id\ + lat lon datetime id \ dist_to_prev dist_to_next dist_prev_to_next situation - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ NaN 13.690153 NaN nan - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 1 39.984198 116.319322 2008-10-23 05:53:06 1 \ 13.690153 7.403788 20.223428 move - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 2 39.984224 116.319402 2008-10-23 05:53:11 1 \ 7.403788 1.821083 5.888579 move - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 3 39.984211 116.319389 2008-10-23 05:53:16 1 \ 1.821083 2.889671 1.873356 move - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 4 39.984217 116.319422 2008-10-23 05:53:21 1 \ 2.889671 66.555997 68.727260 move """ if base_map is None: @@ -1763,10 +1758,16 @@ def _format_tags(line: Union[List, Dict], slice_: List) -> Text: 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> _format_tags(line={'lat': 39.984094, 'lon': 116.319236, 'datetime': '2008-10-23 05:53:05', 'id': 1} - , slice_=['lat', 'lon', 'datetime', 'id']) + >>> _format_tags( + >>> line={ + >>> 'lat': 39.984094, + >>> 'lon': 116.319236, + >>> 'datetime': '2008-10-23 05:53:05', + >>> 'id': 1 + >>> }, + >>> slice_=['lat', 'lon', 'datetime', 'id'] + >>> ) lat: 39.984094
    lon: 116.319236
    datetime: 2008-10-23 05:53:05
    id: 1 - >>> plot_points_folium(move_df.head()) """ map_formated_tags = map(lambda tag: '{}: {}'.format(tag, line[tag]), slice_) @@ -1810,9 +1811,18 @@ def _circle_maker( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> row = move_df.iloc[0]; iter_tuple = (0, row); user_lat = 'lat'; user_lon = 'lon' - >>> slice_tags = row.keys(); user_point = 'pink'; radius = 10; map_ = create_base_map(move_df) - >>> _circle_maker(iter_tuple, user_lat, user_lon, slice_tags, user_point, radius, map_) + >>> row = move_df.iloc[0] + >>> iter_tuple = (0, row) + >>> user_lat = 'lat' + >>> user_lon = 'lon' + >>> slice_tags = row.keys() + >>> user_point = 'pink' + >>> radius = 10 + >>> map_ = create_base_map(move_df) + >>> _circle_maker( + >>> iter_tuple, user_lat, user_lon, + >>> slice_tags, user_point, radius, map_ + >>> ) """ _, line = iter_tuple @@ -2052,293 +2062,6 @@ def plot_event_folium( ) -def show_trajs_with_event( - move_data: DataFrame, - window_time_subject: float, - df_event: DataFrame, - window_time_event: float, - radius: float, - event_lat: Text = LATITUDE, - event_lon: Text = LONGITUDE, - event_datetime: Text = DATETIME, - user_lat: Text = LATITUDE, - user_lon: Text = LONGITUDE, - user_datetime: Text = DATETIME, - event_id: Text = EVENT_ID, - event_point: Text = EVENT_POINT, - user_id: Text = UID, - user_point: Text = USER_POINT, - line_color: Text = LINE_COLOR, - slice_event_show: Optional[List] = None, - slice_subject_show: Optional[List] = None, -) -> List[Map]: - """ - Plot a trajectory, including your user_points lat lon and your tags. - - Parameters - ---------- - move_data: DataFrame. - Trajectory input data. - window_time_subject: float. - The subject time window. - window_time_event: float. - The event time window. - radius: float. - The radius to use. - event_lat: str, optional - Event latitude column name, by default LATITUDE. - event_lon: str, optional - Event longitude column name, by default LONGITUDE. - event_datetime: str, optional - Event datetime column name, by default DATETIME. - user_lat: str, optional - User latitude column name, by default LATITUDE. - user_lon: str, optional - User longitude column name, by default LONGITUDE. - user_datetime: str, optional - User datetime column name, by default DATETIME. - event_id_: str, optional - Event id column name, by default TRAJ_ID. - event_point: str, optional - Event color, by default EVENT_POI. - user_id: str, optional - User id column name, by default TRAJ_ID. - user_point: str, optional - User point color, by default USER_POINT. - line_color: str, optional - Line color, by default 'blue'. - slice_event_show: list, optional - by default None. - slice_subject_show: list, optional - by default None. - - Returns - ------- - list of Map - A list of folium maps. - - Raises - ------ - ValueError - If feature generation fails - - Examples - -------- - """ - # building structure for deltas - delta_event = pd.to_timedelta(window_time_event, unit='s') - delta_user = pd.to_timedelta(window_time_subject, unit='s') - - # length of df_user - len_df_user = move_data.shape[0] - - # building structure for lat and lon array - lat_arr = np.zeros(len_df_user) - lon_arr = np.zeros(len_df_user) - - # folium map list - folium_maps = [] - - # for each event in df_event - for _, line in df_event.iterrows(): - - e_lat = line[event_lat] - e_lon = line[event_lon] - e_datetime = line[event_datetime] - e_id = line[event_id] - - # building time window for event search - start_time = pd.to_datetime(e_datetime - delta_event) - end_time = pd.to_datetime(e_datetime + delta_event) - - # filtering df_ for time window - df_filtered = filters.by_datetime( - move_data, - start_datetime=start_time, - end_datetime=end_time - ) - - if df_filtered is None: - raise ValueError('Filter datetime failed!') - - # length of df_temp - len_df_temp = df_filtered.shape[0] - - # using the util part of the array for haversine function - lat_arr[:len_df_temp] = e_lat - lon_arr[:len_df_temp] = e_lon - - # building distances to event column - df_filtered['distances'] = distances.haversine( - lat_arr[:len_df_temp], - lon_arr[:len_df_temp], - df_filtered[user_lat].values, - df_filtered[user_lon].values - ) - - # building nearby column - df_filtered['nearby'] = df_filtered['distances'].map(lambda x: (x <= radius)) - - # if any data for df_ in event time window is True - if df_filtered['nearby'].any(): - - # building the df for the first user_points of user in nearby event - df_begin = df_filtered[df_filtered['nearby']].sort_values( - user_datetime - ) - - move_data = df_event[df_event[event_id] == e_id] - - base_map = plot_event_folium( - move_data, - event_lat=event_lat, - event_lon=event_lon, - event_point=event_point, - slice_tags=slice_event_show - ) - - # keep only the first user_point nearby to event for each user - df_begin.drop_duplicates( - subset=[user_id, 'nearby'], - inplace=True - ) - # for each user nearby to event - users = [] - - for time_user, id_user in zip( - df_begin[user_datetime], - df_begin[user_id] - ): - # making the time window for user - start_time = pd.to_datetime(time_user - delta_user) - end_time = pd.to_datetime(time_user + delta_user) - - # building the df for one id - df_id = move_data[move_data[user_id] == id_user] - - # filtering df_id for time window - df_temp = filters.by_datetime( - df_id, - start_datetime=start_time, - end_datetime=end_time - ) - - users.append(df_temp) - # add to folium map created - base_map = plot_trajectories_with_folium( - df_temp, - color=[line_color], - base_map=base_map - ) - base_map = plot_points_folium( - df_temp, - user_lat=user_lat, - user_lon=user_lon, - user_point=user_point, - base_map=base_map, - slice_tags=slice_subject_show - ) - # add to folium maps list: (id event, folium map, quantity of user in map, df) - folium_maps.append((base_map, pd.concat(users))) - - return folium_maps - - -def show_traj_id_with_event( - move_data: DataFrame, - window_time_subject: float, - df_event: DataFrame, - window_time_event: float, - radius: float, - subject_id: int, - event_lat: Text = LATITUDE, - event_lon: Text = LONGITUDE, - event_datetime: Text = DATETIME, - user_lat: Text = LATITUDE, - user_lon: Text = LONGITUDE, - user_datetime: Text = DATETIME, - event_id: Text = EVENT_ID, - event_point: Text = EVENT_POINT, - user_id: Text = UID, - user_point: Text = USER_POINT, - line_color: Text = LINE_COLOR, - slice_event_show: Optional[List] = None, - slice_subject_show: Optional[List] = None, -) -> Map: - """ - Plot a trajectory, including your user_points lat lon and your tags. - - Parameters - ---------- - move_data: DataFrame. - Trajectory input data. - window_time_subject: float. - The subject time window. - window_time_event: float. - The event time window. - radius: float. - The radius to use. - subject_id: int - Id of the trajectory - event_lat: str, optional - Event latitude column name, by default LATITUDE. - event_lon: str, optional - Event longitude column name, by default LONGITUDE. - event_datetime: str, optional - Event datetime column name, by default DATETIME. - user_lat: str, optional - User latitude column name, by default LATITUDE. - user_lon: str, optional - User longitude column name, by default LONGITUDE. - user_datetime: str, optional - User datetime column name, by default DATETIME. - event_id_: str, optional - Event id column name, by default TRAJ_ID. - event_point: str, optional - Event color, by default EVENT_POINT. - user_id: str, optional - User id column name, by default TRAJ_ID. - user_point: str, optional - User point color, by default USER_POINT. - line_color: str, optional - Line color, by default 'blue'. - slice_event_show: list, optional - by default None. - slice_subject_show: list, optional - by default None. - - Returns - ------- - Map - A list of folium maps. - - Examples - -------- - """ - df_id = move_data[move_data[user_id] == subject_id] - - return show_trajs_with_event( - df_id, - window_time_subject, - df_event, - window_time_event, - radius, - event_lat=event_lat, - event_lon=event_lon, - event_datetime=event_datetime, - user_lat=user_lat, - user_lon=user_lon, - user_datetime=user_datetime, - event_id=event_id, - event_point=event_point, - user_id=user_id, - user_point=user_point, - line_color=line_color, - slice_event_show=slice_event_show, - slice_subject_show=slice_subject_show - )[0] - - def _create_geojson_features_line( move_data: DataFrame, label_lat: Text = LATITUDE, @@ -2375,15 +2098,41 @@ def _create_geojson_features_line( 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> _create_geojson_features_line(move_df) - [{'geometry': {'coordinates': [[116.319236, 39.984094], - [116.319322, 39.984198]], - 'type': 'Linestr'}, - 'properties': {'popup': 'lat: 39.984094
    lon: 116.319236
    datetime: 2008-10-23 05:53:05
    id: 1', - 'style': {'color': 'red', - 'icon': 'circle', - 'iconstyle': {'color': 'red', 'weight': 4}}, - 'times': ['2008-10-23T05:53:05', '2008-10-23T05:53:06']}, - 'type': 'Feature'},...] + [ + { + "type":"Feature", + "geometry":{ + "type":"Linestr", + "coordinates":[ + [ + 116.319236, + 39.984094 + ], + [ + 116.319322, + 39.984198 + ] + ] + }, + "properties":{ + "times":[ + "2008-10-23T05:53:05", + "2008-10-23T05:53:06" + ], + "popup":"lat: 39.984094
    lon: 116.319236
    \ + datetime: 2008-10-23 05:53:05
    id: 1", + "style":{ + "color":"red", + "icon":"circle", + "iconstyle":{ + "color":"red", + "weight":4 + } + } + } + }, + ... + ] """ features = [] @@ -2391,7 +2140,11 @@ def _create_geojson_features_line( _, last = next(row_iterator) columns = move_data.columns - for i, row in progress_bar(row_iterator, total=move_data.shape[0] - 1) : + for i, row in progress_bar( + row_iterator, + total=move_data.shape[0], + desc='Generating GeoJSon' + ): last_time = last[label_datetime].strftime('%Y-%m-%dT%H:%M:%S') next_time = row[label_datetime].strftime('%Y-%m-%dT%H:%M:%S') From 4b6a5a92d9123ea3ab1040ad23ce8741c651232f Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 6 Jul 2021 16:05:56 -0300 Subject: [PATCH 41/56] removed missing function from module doc --- pymove/visualization/folium.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 2a1fc102..2755e34e 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -19,8 +19,6 @@ plot_points_folium, plot_poi_folium, plot_event_folium, -show_trajs_with_event, -show_traj_id_with_event, plot_traj_timestamp_geo_json """ From aca590d721095a4c488f964a4024f8612aefcb5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Mon, 12 Jul 2021 11:09:38 -0300 Subject: [PATCH 42/56] putting examples on matplotlib module --- pymove/visualization/matplotlib.py | 76 +++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/pymove/visualization/matplotlib.py b/pymove/visualization/matplotlib.py index a2105cdb..53e5d9b8 100644 --- a/pymove/visualization/matplotlib.py +++ b/pymove/visualization/matplotlib.py @@ -79,6 +79,29 @@ def show_object_id_by_date( ---------- https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.plot.html + Examples + -------- + >>> from pymove.visualization.matplotlib import show_object_id_by_date + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> show_object_id_by_date(move_df) + lat lon datetime id\ + date hour period day + 0 39.984094 116.319236 2008-10-23 05:53:05 1\ + 2008-10-23 5 Early morning Thursday + 1 39.984198 116.319322 2008-10-23 05:53:06 1\ + 2008-10-23 5 Early morning Thursday + 2 39.984224 116.319402 2008-10-23 05:53:11 1\ + 2008-10-23 5 Early morning Thursday + 3 39.984211 116.319389 2008-10-23 05:53:16 1\ + 2008-10-23 5 Early morning Thursday + 4 39.984217 116.319422 2008-10-23 05:53:21 1\ + 2008-10-23 5 Early morning Thursday """ if kind is None: kind = ['bar', 'bar', 'line', 'line'] @@ -151,6 +174,18 @@ def plot_trajectories( ------- figure The generated picture or None + + Examples + -------- + >>> from pymove.visualization.matplotlib import plot_trajectories + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> plot_trajectories(move_df) """ fig = plt.figure(figsize=figsize) @@ -225,6 +260,18 @@ def plot_traj_by_id( IndexError If there is no trajectory with the tid passed + Examples + -------- + >>> from pymove.visualization.matplotlib import plot_traj_by_id + >>> move_df + lat lon datetime id tid + 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 2 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 2 + >>> print(plot_traj_by_id(move_df_3, '1')) + >>> print(plot_traj_by_id(move_df_3, '2')) """ if label not in move_data: raise KeyError('%s feature not in dataframe' % label) @@ -306,6 +353,17 @@ def plot_all_features( AttributeError If there are no columns with the specified type + Examples + -------- + >>> from pymove.visualization.matplotlib import plot_all_features + >>> move_df.head() + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> plot_all_features(move_df) """ col_dtype = move_data.select_dtypes(include=[dtype]).columns tam = col_dtype.size @@ -341,6 +399,11 @@ def plot_coords(ax: axes, ob: BaseGeometry, color: Text = 'r'): Example ------- + >>> from pymove.visualization.matplotlib import plot_coords + >>> import matplotlib.pyplot as plt + >>> coords = LineString([(1, 1), (1, 2), (2, 2), (2, 3)]) + >>> _, ax = plt.subplots(figsize=(21, 9)) + >>> plot_coords(ax, coords) """ x, y = ob.xy ax.plot(x, y, 'o', color=color, zorder=1) @@ -348,7 +411,7 @@ def plot_coords(ax: axes, ob: BaseGeometry, color: Text = 'r'): def plot_bounds(ax: axes, ob: Union[LineString, MultiLineString], color='b'): """ - Plot the limites of geometric object. + Plot the limits of geometric object. Parameters ---------- @@ -361,7 +424,11 @@ def plot_bounds(ax: axes, ob: Union[LineString, MultiLineString], color='b'): Example ------- - + >>> from pymove.visualization.matplotlib import plot_bounds + >>> import matplotlib.pyplot as plt + >>> bounds = LineString([(1, 1), (1, 2), (2, 2), (2, 3)]) + >>> _, ax = plt.subplots(figsize=(21, 9)) + >>> plot_bounds(ax, bounds) """ x, y = zip(*list((p.x, p.y) for p in ob.boundary)) ax.plot(x, y, '-', color=color, zorder=1) @@ -398,6 +465,11 @@ def plot_line( Example ------- + >>> from pymove.visualization.matplotlib import plot_line + >>> import matplotlib.pyplot as plt + >>> line = LineString([(1, 1), (1, 2), (2, 2), (2, 3)]) + >>> _, ax = plt.subplots(figsize=(21, 9)) + >>> plot_line(ax, line) """ x, y = ob.xy ax.plot( From 77ff702ad4368ca4e2b0804ee68077fd341ef38f Mon Sep 17 00:00:00 2001 From: flych3r Date: Mon, 12 Jul 2021 12:56:29 -0300 Subject: [PATCH 43/56] stop plot functions from changing dataframe --- pymove/tests/test_visualization_matplotlib.py | 17 ------- pymove/visualization/folium.py | 15 +++++++ pymove/visualization/matplotlib.py | 45 +++++++------------ 3 files changed, 30 insertions(+), 47 deletions(-) diff --git a/pymove/tests/test_visualization_matplotlib.py b/pymove/tests/test_visualization_matplotlib.py index c5075097..897ebded 100644 --- a/pymove/tests/test_visualization_matplotlib.py +++ b/pymove/tests/test_visualization_matplotlib.py @@ -50,7 +50,6 @@ def test_show_object_id_by_date(tmpdir): mpl.show_object_id_by_date( move_data=move_df, - create_features=False, name=filename_write_default ) @@ -64,22 +63,6 @@ def test_show_object_id_by_date(tmpdir): in_decorator=False ) - assert(HOUR not in move_df) - assert(DATE not in move_df) - assert(PERIOD not in move_df) - assert(DAY not in move_df) - - mpl.show_object_id_by_date( - move_data=move_df, - create_features=True, - name=filename_write_default - ) - - assert(DATE in move_df) - assert(HOUR in move_df) - assert(PERIOD in move_df) - assert(DAY in move_df) - def test_plot_traj_by_id(tmpdir): move_df = _default_move_df() diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 2755e34e..8c503c4b 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -1186,6 +1186,7 @@ def plot_trajectory_by_period( default_zoom_start=zoom_start, ) + columns = move_data.columns if PERIOD not in move_data: move_data.generate_time_of_day_features() @@ -1194,6 +1195,8 @@ def plot_trajectory_by_period( _add_trajectories_to_folium_map( mv_df, items, base_map, legend, save_as_html, filename ) + to_drop = list(set(move_data.columns) - set(columns)) + move_data.drop(columns=to_drop, inplace=True) return base_map @@ -1295,6 +1298,7 @@ def plot_trajectory_by_day_week( default_zoom_start=zoom_start, ) + columns = move_data.columns if DAY not in move_data: move_data.generate_day_of_the_week_features() @@ -1303,6 +1307,8 @@ def plot_trajectory_by_day_week( _add_trajectories_to_folium_map( mv_df, items, base_map, legend, save_as_html, filename ) + to_drop = list(set(move_data.columns) - set(columns)) + move_data.drop(columns=to_drop, inplace=True) return base_map @@ -1417,6 +1423,7 @@ def plot_trajectory_by_date( if isinstance(end_date, str): end_date = str_to_datetime(end_date).date() + columns = move_data.columns if DATE not in move_data: move_data.generate_date_features() @@ -1425,6 +1432,8 @@ def plot_trajectory_by_date( _add_trajectories_to_folium_map( mv_df, items, base_map, legend, save_as_html, filename ) + to_drop = list(set(move_data.columns) - set(columns)) + move_data.drop(columns=to_drop, inplace=True) return base_map @@ -1528,6 +1537,7 @@ def plot_trajectory_by_hour( default_zoom_start=zoom_start, ) + columns = move_data.columns if HOUR not in move_data: move_data.generate_hour_features() @@ -1536,6 +1546,8 @@ def plot_trajectory_by_hour( _add_trajectories_to_folium_map( mv_df, items, base_map, legend, save_as_html, filename ) + to_drop = list(set(move_data.columns) - set(columns)) + move_data.drop(columns=to_drop, inplace=True) return base_map @@ -1643,6 +1655,7 @@ def plot_stops( default_zoom_start=zoom_start, ) + columns = move_data.columns if SITUATION not in move_data: move_data.generate_move_and_stop_by_radius(radius=radius) @@ -1669,6 +1682,8 @@ def plot_stops( if save_as_html: base_map.save(outfile=filename) + to_drop = list(set(move_data.columns) - set(columns)) + move_data.drop(columns=to_drop, inplace=True) return base_map diff --git a/pymove/visualization/matplotlib.py b/pymove/visualization/matplotlib.py index 53e5d9b8..8b526db7 100644 --- a/pymove/visualization/matplotlib.py +++ b/pymove/visualization/matplotlib.py @@ -7,7 +7,7 @@ plot_all_features plot_coords, plot_bounds, -plot_line, +plot_line """ @@ -37,7 +37,6 @@ def show_object_id_by_date( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - create_features: bool = True, kind: Optional[List] = None, figsize: Tuple[float, float] = (21, 9), return_fig: bool = True, @@ -56,9 +55,6 @@ def show_object_id_by_date( ---------- move_data : pymove.core.MoveDataFrameAbstract subclass. Input trajectory data. - create_features : bool, optional - Represents whether or not to delete features created for viewing, - by default True. kind: list, optional Determines the kinds of each plot, by default None figsize : tuple, optional @@ -90,24 +86,13 @@ def show_object_id_by_date( 3 39.984211 116.319389 2008-10-23 05:53:16 2 4 39.984217 116.319422 2008-10-23 05:53:21 2 >>> show_object_id_by_date(move_df) - lat lon datetime id\ - date hour period day - 0 39.984094 116.319236 2008-10-23 05:53:05 1\ - 2008-10-23 5 Early morning Thursday - 1 39.984198 116.319322 2008-10-23 05:53:06 1\ - 2008-10-23 5 Early morning Thursday - 2 39.984224 116.319402 2008-10-23 05:53:11 1\ - 2008-10-23 5 Early morning Thursday - 3 39.984211 116.319389 2008-10-23 05:53:16 1\ - 2008-10-23 5 Early morning Thursday - 4 39.984217 116.319422 2008-10-23 05:53:21 1\ - 2008-10-23 5 Early morning Thursday """ if kind is None: kind = ['bar', 'bar', 'line', 'line'] fig, ax = plt.subplots(2, 2, figsize=figsize) + columns = move_data.columns move_data.generate_date_features() move_data.generate_hour_features() move_data.generate_time_of_day_features() @@ -131,12 +116,12 @@ def show_object_id_by_date( subplots=True, kind=kind[3], grid=True, ax=ax[1][1], fontsize=12 ) - if not create_features: - move_data.drop(columns=[DATE, HOUR, PERIOD, DAY], inplace=True) - if save_fig: plt.savefig(fname=name) + to_drop = list(set(move_data.columns) - set(columns)) + move_data.drop(columns=to_drop, inplace=True) + if return_fig: return fig @@ -264,14 +249,14 @@ def plot_traj_by_id( -------- >>> from pymove.visualization.matplotlib import plot_traj_by_id >>> move_df - lat lon datetime id tid - 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 1 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 2 - 3 39.984211 116.319389 2008-10-23 05:53:16 2 2 - 4 39.984217 116.319422 2008-10-23 05:53:21 2 2 - >>> print(plot_traj_by_id(move_df_3, '1')) - >>> print(plot_traj_by_id(move_df_3, '2')) + lat lon datetime id + 0 39.984094 116.319236 2008-10-23 05:53:05 1 + 1 39.984198 116.319322 2008-10-23 05:53:06 1 + 2 39.984224 116.319402 2008-10-23 05:53:11 1 + 3 39.984211 116.319389 2008-10-23 05:53:16 2 + 4 39.984217 116.319422 2008-10-23 05:53:21 2 + >>> plot_traj_by_id(move_df_3, 1, label='id) + >>> plot_traj_by_id(move_df_3, 2, label='id) """ if label not in move_data: raise KeyError('%s feature not in dataframe' % label) @@ -303,10 +288,10 @@ def plot_traj_by_id( plt.plot( df_.iloc[0][LONGITUDE], df_.iloc[0][LATITUDE], 'yo', markersize=markersize - ) # start point + ) plt.plot( df_.iloc[-1][LONGITUDE], df_.iloc[-1][LATITUDE], 'yX', markersize=markersize - ) # end point + ) if save_fig: if not name: From e27e044c8aec3f3f44059bd0b36713c758a3e35e Mon Sep 17 00:00:00 2001 From: flych3r Date: Mon, 12 Jul 2021 12:59:31 -0300 Subject: [PATCH 44/56] stop plot functions from changing dataframe --- pymove/visualization/folium.py | 42 +--------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 8c503c4b..cc99da4d 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -1169,13 +1169,6 @@ def plot_trajectory_by_period( 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> plot_trajectory_by_period(move_df, period='Early morning') - >>> move_df.head() - lat lon datetime id period - 0 39.984094 116.319236 2008-10-23 05:53:05 1 Early morning - 1 39.984198 116.319322 2008-10-23 05:53:06 1 Early morning - 2 39.984224 116.319402 2008-10-23 05:53:11 1 Early morning - 3 39.984211 116.319389 2008-10-23 05:53:16 1 Early morning - 4 39.984217 116.319422 2008-10-23 05:53:21 1 Early morning """ if base_map is None: base_map = create_base_map( @@ -1281,13 +1274,6 @@ def plot_trajectory_by_day_week( 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> plot_trajectory_by_day_week(move_df, day_week='Friday') - >>> move_df.head() - lat lon datetime id day - 0 39.984094 116.319236 2008-10-23 05:53:05 1 Thursday - 1 39.984198 116.319322 2008-10-23 05:53:06 1 Thursday - 2 39.984224 116.319402 2008-10-23 05:53:11 1 Thursday - 3 39.984211 116.319389 2008-10-23 05:53:16 1 Thursday - 4 39.984217 116.319422 2008-10-23 05:53:21 1 Thursday """ if base_map is None: base_map = create_base_map( @@ -1400,13 +1386,6 @@ def plot_trajectory_by_date( >>> start_date='2008-10-23 05:53:05', >>> end_date='2008-10-23 23:43:56' >>> ) - >>> move_df.head() - lat lon datetime id date - 0 39.984094 116.319236 2008-10-23 05:53:05 1 2008-10-23 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 2008-10-23 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 2008-10-23 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 2008-10-23 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 2008-10-23 """ if base_map is None: base_map = create_base_map( @@ -1520,13 +1499,7 @@ def plot_trajectory_by_hour( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_trajectory_by_hour(move_df, start_hour=4,end_hour=6) - lat lon datetime id hour - 0 39.984094 116.319236 2008-10-23 05:53:05 1 5 - 1 39.984198 116.319322 2008-10-23 05:53:06 1 5 - 2 39.984224 116.319402 2008-10-23 05:53:11 1 5 - 3 39.984211 116.319389 2008-10-23 05:53:16 1 5 - 4 39.984217 116.319422 2008-10-23 05:53:21 1 5 + >>> plot_trajectory_by_hour(move_df, start_hour=4, end_hour=6) """ if base_map is None: base_map = create_base_map( @@ -1632,19 +1605,6 @@ def plot_stops( 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> plot_stops(move_df) - >>> move_df.head() - lat lon datetime id \ - dist_to_prev dist_to_next dist_prev_to_next situation - 0 39.984094 116.319236 2008-10-23 05:53:05 1 \ - NaN 13.690153 NaN nan - 1 39.984198 116.319322 2008-10-23 05:53:06 1 \ - 13.690153 7.403788 20.223428 move - 2 39.984224 116.319402 2008-10-23 05:53:11 1 \ - 7.403788 1.821083 5.888579 move - 3 39.984211 116.319389 2008-10-23 05:53:16 1 \ - 1.821083 2.889671 1.873356 move - 4 39.984217 116.319422 2008-10-23 05:53:21 1 \ - 2.889671 66.555997 68.727260 move """ if base_map is None: base_map = create_base_map( From 1b8c55c6555f54d289dfc6abf2f927d7236867b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor?= Date: Mon, 12 Jul 2021 23:56:53 -0300 Subject: [PATCH 45/56] renaming visualization methods --- pymove/tests/test_visualization_folium.py | 36 ++++++------ pymove/visualization/folium.py | 70 +++++++++++------------ 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/pymove/tests/test_visualization_folium.py b/pymove/tests/test_visualization_folium.py index 300c2f33..07320417 100644 --- a/pymove/tests/test_visualization_folium.py +++ b/pymove/tests/test_visualization_folium.py @@ -460,7 +460,7 @@ def test_filter_generated_feature(): pass -def test_add_begin_end_markers_to_folium_map(tmpdir): +def test_add_begin_end_markers_to_map(tmpdir): move_df = _default_move_df() @@ -471,7 +471,7 @@ def test_add_begin_end_markers_to_folium_map(tmpdir): tile=TILES[0], default_zoom_start=12) - folium._add_begin_end_markers_to_folium_map(move_df, base_map) + folium._add_begin_end_markers_to_map(move_df, base_map) d = tmpdir.mkdir('visualization') @@ -518,7 +518,7 @@ def test_add_begin_end_markers_to_folium_map(tmpdir): '\n{"clusteredMarker":true,"color":"red"}') in map_info) -def test_add_trajectories_to_folium_map(tmpdir): +def test_add_trajectories_to_map(tmpdir): move_df = _default_move_df() d = tmpdir.mkdir('visualization') @@ -580,18 +580,18 @@ def test_add_trajectories_to_folium_map(tmpdir): ) -def test_plot_trajectories_with_folium(tmpdir): +def test_plot_trajectories(tmpdir): move_df = _default_move_df() d = tmpdir.mkdir('visualization') - file_write_default = d.join('plot_trajectories_with_folium.html') + file_write_default = d.join('plot_trajectories.html') filename = os.path.join( file_write_default.dirname, file_write_default.basename ) - base_map = folium.plot_trajectories_with_folium( + base_map = folium.plot_trajectories( move_df, n_rows=3, save_as_html=True, @@ -645,18 +645,18 @@ def test_plot_trajectories_with_folium(tmpdir): ) -def test_plot_trajectory_by_id_folium(tmpdir): +def test_plot_trajectory_by_id(tmpdir): move_df = _default_move_df() d = tmpdir.mkdir('visualization') - file_write_default = d.join('plot_trajectory_by_id_folium.html') + file_write_default = d.join('plot_trajectory_by_id.html') filename_write_default = os.path.join( file_write_default.dirname, file_write_default.basename ) - base_map = folium.plot_trajectory_by_id_folium( + base_map = folium.plot_trajectory_by_id( move_df, id_=1, save_as_html=True, @@ -866,18 +866,18 @@ def test_plot_bbox(tmpdir): assert_equal(expected, actual) -def test_plot_point_folium(tmpdir): +def test_plot_point(tmpdir): move_df = _default_move_df() d = tmpdir.mkdir('visualization') - file_write_default = d.join('plot_point_folium.html') + file_write_default = d.join('plot_point.html') filename = os.path.join( file_write_default.dirname, file_write_default.basename ) - base_map = folium.plot_points_folium( + base_map = folium.plot_points( move_data=move_df, user_lat=LATITUDE, user_lon=LONGITUDE, @@ -936,18 +936,18 @@ def test_plot_point_folium(tmpdir): ) -def test_plot_poi_folium(tmpdir): +def test_plot_poi(tmpdir): move_df = _default_move_df() d = tmpdir.mkdir('visualization') - file_write_default = d.join('plot_point_folium.html') + file_write_default = d.join('plot_point.html') filename = os.path.join( file_write_default.dirname, file_write_default.basename ) - base_map = folium.plot_poi_folium( + base_map = folium.plot_poi( move_data=move_df, poi_lat=LATITUDE, poi_lon=LONGITUDE, @@ -1005,18 +1005,18 @@ def test_plot_poi_folium(tmpdir): ) -def test_plot_event_folium(tmpdir): +def test_plot_event(tmpdir): move_df = _default_move_df() d = tmpdir.mkdir('visualization') - file_write_default = d.join('plot_event_folium.html') + file_write_default = d.join('plot_event.html') filename = os.path.join( file_write_default.dirname, file_write_default.basename ) - base_map = folium.plot_event_folium( + base_map = folium.plot_event( move_data=move_df, event_lat=LATITUDE, event_lon=LONGITUDE, diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 2755e34e..36c370b0 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -792,7 +792,7 @@ def _filter_generated_feature( return mv_df -def _add_begin_end_markers_to_folium_map( +def _add_begin_end_markers_to_map( move_data: DataFrame, base_map: Map, color: Optional[Text] = None, @@ -817,7 +817,7 @@ def _add_begin_end_markers_to_folium_map( Examples -------- - >>> from pymove.visualization.folium import _add_begin_end_markers_to_folium_map + >>> from pymove.visualization.folium import _add_begin_end_markers_to_map >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -826,7 +826,7 @@ def _add_begin_end_markers_to_folium_map( 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 >>> map = create_base_map(move_df) - >>> _add_begin_end_markers_to_folium_map(move_df, map) + >>> _add_begin_end_markers_to_map(move_df, map) """ points = folium.map.FeatureGroup( 'The start and end points of trajectory {}'.format(_id or '') @@ -855,7 +855,7 @@ def _add_begin_end_markers_to_folium_map( base_map.add_child(points) -def _add_trajectories_to_folium_map( +def _add_trajectories_to_map( move_data: DataFrame, items: Sequence[Tuple], base_map: Map, @@ -881,14 +881,14 @@ def _add_trajectories_to_folium_map( Examples -------- - >>> from pymove.visualization.folium import _add_trajectories_to_folium_map + >>> from pymove.visualization.folium import _add_trajectories_to_map >>> move_df lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 1 39.984198 116.319322 2008-10-23 05:53:06 1 3 39.988118 116.326672 2008-10-25 14:39:19 5 4 39.987965 116.326675 2008-10-25 14:39:24 5 - >>> _add_trajectories_to_folium_map( + >>> _add_trajectories_to_map( >>> move_data=move_df, >>> base_map=map1, >>> items=[(1, 'red'), [5, 'green']] @@ -897,7 +897,7 @@ def _add_trajectories_to_folium_map( for _id, color in items: mv = move_data[move_data[TRAJ_ID] == _id] - _add_begin_end_markers_to_folium_map(mv, base_map, color, _id) + _add_begin_end_markers_to_map(mv, base_map, color, _id) folium.PolyLine( mv[[LATITUDE, LONGITUDE]], color=color, weight=2.5, opacity=1 @@ -912,7 +912,7 @@ def _add_trajectories_to_folium_map( base_map.save(outfile=filename) -def plot_trajectories_with_folium( +def plot_trajectories( move_data: DataFrame, n_rows: Optional[int] = None, lat_origin: Optional[float] = None, @@ -924,7 +924,7 @@ def plot_trajectories_with_folium( save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, color_by_id: Optional[Dict] = None, - filename: Text = 'plot_trajectories_with_folium.html', + filename: Text = 'plot_trajectories.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -961,7 +961,7 @@ def plot_trajectories_with_folium( by default None. filename : str, optional Represents the file name of new file .html, - by default 'plot_trajectory_with_folium.html'. + by default 'plot_trajectory.html'. Returns ------- @@ -970,7 +970,7 @@ def plot_trajectories_with_folium( Examples -------- - >>> from pymove.visualization.folium import plot_trajectories_with_folium + >>> from pymove.visualization.folium import plot_trajectories >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -978,7 +978,7 @@ def plot_trajectories_with_folium( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_trajectories_with_folium(move_df) + >>> plot_trajectories(move_df) """ if base_map is None: base_map = create_base_map( @@ -993,14 +993,14 @@ def plot_trajectories_with_folium( move_data, n_rows=n_rows, color=color, color_by_id=color_by_id ) - _add_trajectories_to_folium_map( + _add_trajectories_to_map( mv_df, items, base_map, legend, save_as_html, filename ) return base_map -def plot_trajectory_by_id_folium( +def plot_trajectory_by_id( move_data: DataFrame, id_: int, n_rows: Optional[int] = None, @@ -1012,7 +1012,7 @@ def plot_trajectory_by_id_folium( tile: Text = TILES[0], save_as_html: bool = False, color: Optional[Union[Text, List[Text]]] = None, - filename: Text = 'plot_trajectories_with_folium.html', + filename: Text = 'plot_trajectories.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1048,7 +1048,7 @@ def plot_trajectory_by_id_folium( Can be a single color name, a list of colors or a colormap name, by default None. filename : str, optional Represents the file name of new file .html, - by default 'plot_trajectory_by_id_with_folium.html'. + by default 'plot_trajectory_by_id.html'. Returns ------- @@ -1062,7 +1062,7 @@ def plot_trajectory_by_id_folium( Examples -------- - >>> from pymove.visualization.folium import plot_trajectory_by_id_folium + >>> from pymove.visualization.folium import plot_trajectory_by_id >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -1070,7 +1070,7 @@ def plot_trajectory_by_id_folium( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 2 4 39.984217 116.319422 2008-10-23 05:53:21 2 - >>> plot_trajectory_by_id_folium(move_df, id_=1) + >>> plot_trajectory_by_id(move_df, id_=1) """ if base_map is None: base_map = create_base_map( @@ -1082,7 +1082,7 @@ def plot_trajectory_by_id_folium( ) mv_df, items = _filter_and_generate_colors(move_data, id_, n_rows, color) - _add_trajectories_to_folium_map( + _add_trajectories_to_map( mv_df, items, base_map, legend, save_as_html, filename ) @@ -1191,7 +1191,7 @@ def plot_trajectory_by_period( mv_df = _filter_generated_feature(move_data, PERIOD, [period]) mv_df, items = _filter_and_generate_colors(mv_df, id_, n_rows, color, color_by_id) - _add_trajectories_to_folium_map( + _add_trajectories_to_map( mv_df, items, base_map, legend, save_as_html, filename ) @@ -1300,7 +1300,7 @@ def plot_trajectory_by_day_week( mv_df = _filter_generated_feature(move_data, DAY, [day_week]) mv_df, items = _filter_and_generate_colors(mv_df, id_, n_rows, color, color_by_id) - _add_trajectories_to_folium_map( + _add_trajectories_to_map( mv_df, items, base_map, legend, save_as_html, filename ) @@ -1422,7 +1422,7 @@ def plot_trajectory_by_date( mv_df = _filter_generated_feature(move_data, DATE, [start_date, end_date]) mv_df, items = _filter_and_generate_colors(mv_df, id_, n_rows, color, color_by_id) - _add_trajectories_to_folium_map( + _add_trajectories_to_map( mv_df, items, base_map, legend, save_as_html, filename ) @@ -1533,7 +1533,7 @@ def plot_trajectory_by_hour( mv_df = _filter_generated_feature(move_data, HOUR, [start_hour, end_hour]) mv_df, items = _filter_and_generate_colors(mv_df, id_, n_rows, color, color_by_id) - _add_trajectories_to_folium_map( + _add_trajectories_to_map( mv_df, items, base_map, legend, save_as_html, filename ) @@ -1748,7 +1748,7 @@ def _format_tags(line: Union[List, Dict], slice_: List) -> Text: Examples -------- - >>> from pymove.visualization.folium import _format_tags, plot_points_folium + >>> from pymove.visualization.folium import _format_tags, plot_points >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -1838,7 +1838,7 @@ def _circle_maker( ).add_to(map_) -def plot_points_folium( +def plot_points( move_data: DataFrame, user_lat: Text = LATITUDE, user_lon: Text = LONGITUDE, @@ -1885,7 +1885,7 @@ def plot_points_folium( Examples -------- - >>> from pymove.visualization.folium import plot_points_folium + >>> from pymove.visualization.folium import plot_points >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -1893,7 +1893,7 @@ def plot_points_folium( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_points_folium(move_df) + >>> plot_points(move_df) """ if slice_tags is None: slice_tags = list(move_data.columns) @@ -1925,7 +1925,7 @@ def plot_points_folium( return base_map -def plot_poi_folium( +def plot_poi( move_data: DataFrame, poi_lat: Text = LATITUDE, poi_lon: Text = LONGITUDE, @@ -1970,7 +1970,7 @@ def plot_poi_folium( Examples -------- - >>> from pymove.visualization.folium import plot_poi_folium + >>> from pymove.visualization.folium import plot_poi >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -1978,9 +1978,9 @@ def plot_poi_folium( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_poi_folium(move_df) + >>> plot_poi(move_df) """ - return plot_points_folium( + return plot_points( move_data, user_lat=poi_lat, user_lon=poi_lon, @@ -1994,7 +1994,7 @@ def plot_poi_folium( ) -def plot_event_folium( +def plot_event( move_data: DataFrame, event_lat: Text = LATITUDE, event_lon: Text = LONGITUDE, @@ -2036,7 +2036,7 @@ def plot_event_folium( Examples -------- - >>> from pymove.visualization.folium import plot_event_folium + >>> from pymove.visualization.folium import plot_event >>> move_df.head() lat lon datetime id 0 39.984094 116.319236 2008-10-23 05:53:05 1 @@ -2044,9 +2044,9 @@ def plot_event_folium( 2 39.984224 116.319402 2008-10-23 05:53:11 1 3 39.984211 116.319389 2008-10-23 05:53:16 1 4 39.984217 116.319422 2008-10-23 05:53:21 1 - >>> plot_event_folium(move_df) + >>> plot_event(move_df) """ - return plot_points_folium( + return plot_points( move_data, user_lat=event_lat, user_lon=event_lon, From 41742509cd28031674e3ad94f1e09b9b3cb137d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Carvalho?= Date: Tue, 13 Jul 2021 00:03:18 -0300 Subject: [PATCH 46/56] a little change --- pymove/visualization/folium.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 36c370b0..66408094 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -8,17 +8,17 @@ cluster, faster_cluster, plot_markers, -plot_trajectories_with_folium, -plot_trajectory_by_id_folium, +plot_trajectories, +plot_trajectory_by_id, plot_trajectory_by_period, plot_trajectory_by_day_week, plot_trajectory_by_date, plot_trajectory_by_hour, plot_stops, plot_bbox, -plot_points_folium, -plot_poi_folium, -plot_event_folium, +plot_points, +plot_poi, +plot_event, plot_traj_timestamp_geo_json """ From d8e84d1cebf35c64ef004ce5ac31b08c6724d307 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 16:59:16 -0300 Subject: [PATCH 47/56] renamed matplotlib.plot_traj_id --- pymove/tests/test_visualization_matplotlib.py | 7 +++-- pymove/visualization/matplotlib.py | 30 +++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pymove/tests/test_visualization_matplotlib.py b/pymove/tests/test_visualization_matplotlib.py index 897ebded..8b4e49f6 100644 --- a/pymove/tests/test_visualization_matplotlib.py +++ b/pymove/tests/test_visualization_matplotlib.py @@ -50,7 +50,8 @@ def test_show_object_id_by_date(tmpdir): mpl.show_object_id_by_date( move_data=move_df, - name=filename_write_default + name=filename_write_default, + save_fig=True ) test_dir = os.path.abspath(os.path.dirname(__file__)) @@ -64,7 +65,7 @@ def test_show_object_id_by_date(tmpdir): ) -def test_plot_traj_by_id(tmpdir): +def test_plot_trajectory_by_id(tmpdir): move_df = _default_move_df() move_df[TID] = ['1', '1', '2', '2', '2'] @@ -75,7 +76,7 @@ def test_plot_traj_by_id(tmpdir): file_write_default.dirname, file_write_default.basename ) - mpl.plot_traj_by_id(move_df, '1', save_fig=True, name=filename_write_default) + mpl.plot_trajectory_by_id(move_df, '1', save_fig=True, name=filename_write_default) test_dir = os.path.abspath(os.path.dirname(__file__)) data_dir = os.path.join(test_dir, 'baseline/traj_id.png') diff --git a/pymove/visualization/matplotlib.py b/pymove/visualization/matplotlib.py index 8b526db7..ec45a4dd 100644 --- a/pymove/visualization/matplotlib.py +++ b/pymove/visualization/matplotlib.py @@ -3,7 +3,7 @@ show_object_id_by_date, plot_trajectories, -plot_traj_by_id, +plot_trajectory_by_id, plot_all_features plot_coords, plot_bounds, @@ -39,8 +39,8 @@ def show_object_id_by_date( move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], kind: Optional[List] = None, figsize: Tuple[float, float] = (21, 9), - return_fig: bool = True, - save_fig: bool = True, + return_fig: bool = False, + save_fig: bool = False, name: Text = 'shot_points_by_date.png', ) -> Optional[figure]: """ @@ -60,9 +60,9 @@ def show_object_id_by_date( figsize : tuple, optional Represents dimensions of figure, by default (21,9). return_fig : bool, optional - Represents whether or not to save the generated picture, by default True. + Represents whether or not to save the generated picture, by default False. save_fig : bool, optional - Represents whether or not to save the generated picture, by default True. + Represents whether or not to save the generated picture, by default False. name : String, optional Represents name of a file, by default 'shot_points_by_date.png'. @@ -131,8 +131,8 @@ def plot_trajectories( markers: Text = 'o', markersize: float = 12, figsize: Tuple[float, float] = (10, 10), - return_fig: bool = True, - save_fig: bool = True, + return_fig: bool = False, + save_fig: bool = False, name: Text = 'trajectories.png', ) -> Optional[figure]: """ @@ -149,7 +149,7 @@ def plot_trajectories( figsize : tuple(float, float), optional Represents dimensions of figure, by default (10, 10) return_fig : bool, optional - Represents whether or not to return the generated picture, by default True + Represents whether or not to return the generated picture, by default False save_fig : bool, optional Represents whether or not to save the generated picture, by default False name : str, optional @@ -191,7 +191,7 @@ def plot_trajectories( return fig -def plot_traj_by_id( +def plot_trajectory_by_id( move_data: DataFrame, id_: Union[int, Text], label: Text = TID, @@ -200,8 +200,8 @@ def plot_traj_by_id( linewidth: float = 3, markersize: float = 20, figsize: Tuple[float, float] = (10, 10), - return_fig: bool = True, - save_fig: bool = True, + return_fig: bool = False, + save_fig: bool = False, name: Optional[Text] = None, ) -> Optional[figure]: """ @@ -226,7 +226,7 @@ def plot_traj_by_id( figsize : tuple(float, float), optional Represents dimensions of figure, by default (10, 10) return_fig : bool, optional - Represents whether or not to return the generated picture, by default True + Represents whether or not to return the generated picture, by default False save_fig : bool, optional Represents whether or not to save the generated picture, by default False name : str, optional @@ -306,8 +306,8 @@ def plot_all_features( move_data: DataFrame, dtype: Callable = float, figsize: Tuple[float, float] = (21, 15), - return_fig: bool = True, - save_fig: bool = True, + return_fig: bool = False, + save_fig: bool = False, name: Text = 'features.png', ) -> Optional[figure]: """ @@ -322,7 +322,7 @@ def plot_all_features( figsize : tuple(float, float), optional Represents dimensions of figure, by default (21, 15) return_fig : bool, optional - Represents whether or not to return the generated picture, by default True + Represents whether or not to return the generated picture, by default False save_fig : bool, optional Represents whether or not to save the generated picture, by default False name : str, optional From 2e30dd5b6aa02ed4679019e4145e786c8254752e Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 17:57:02 -0300 Subject: [PATCH 48/56] changed typing style to python 3.10 --- pymove/core/dask.py | 19 +- pymove/core/dataframe.py | 19 +- pymove/core/grid.py | 100 +----- pymove/core/pandas.py | 195 +++++------ pymove/core/pandas_discrete.py | 43 ++- pymove/models/pattern_mining/clustering.py | 17 +- pymove/preprocessing/compression.py | 13 +- pymove/preprocessing/filters.py | 83 ++--- pymove/preprocessing/segmentation.py | 55 +-- pymove/preprocessing/stay_point_detection.py | 21 +- pymove/query/query.py | 27 +- pymove/semantic/semantic.py | 71 ++-- pymove/tests/test_visualization_matplotlib.py | 2 +- pymove/utils/conversions.py | 106 +++--- pymove/utils/data_augmentation.py | 27 +- pymove/utils/datetime.py | 44 +-- pymove/utils/distances.py | 36 +- pymove/utils/geoutils.py | 19 +- pymove/utils/integration.py | 90 ++--- pymove/utils/log.py | 23 +- pymove/utils/math.py | 18 +- pymove/utils/mem.py | 22 +- pymove/utils/trajectories.py | 38 +-- pymove/utils/visual.py | 19 +- pymove/visualization/folium.py | 314 +++++++++--------- pymove/visualization/matplotlib.py | 122 +++++-- 26 files changed, 773 insertions(+), 770 deletions(-) diff --git a/pymove/core/dask.py b/pymove/core/dask.py index 99f654de..05c15a90 100644 --- a/pymove/core/dask.py +++ b/pymove/core/dask.py @@ -1,6 +1,7 @@ """DaskMoveDataFrame class.""" +from __future__ import annotations -from typing import TYPE_CHECKING, Dict, List, Text, Union +from typing import TYPE_CHECKING import dask import numpy as np @@ -27,11 +28,11 @@ class DaskMoveDataFrame(DataFrame, MoveDataFrameAbstractModel): def __init__( self, - data: Union[DataFrame, List, Dict], - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME, - traj_id: Text = TRAJ_ID, + data: DataFrame | list | dict, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME, + traj_id: str = TRAJ_ID, n_partitions: int = 1, ): """ @@ -501,8 +502,8 @@ def to_csv(self, *args, **kwargs): raise NotImplementedError('To be implemented') def convert_to( - self, new_type: Text - ) -> Union[MoveDataFrame, 'PandasMoveDataFrame', 'DaskMoveDataFrame']: + self, new_type: str + ) -> MoveDataFrame | 'PandasMoveDataFrame' | 'DaskMoveDataFrame': """ Convert an object from one type to another specified by the user. @@ -530,7 +531,7 @@ def convert_to( else: return self - def get_type(self) -> Text: + def get_type(self) -> str: """ Returns the type of the object. diff --git a/pymove/core/dataframe.py b/pymove/core/dataframe.py index 3e586116..ce92cd8a 100644 --- a/pymove/core/dataframe.py +++ b/pymove/core/dataframe.py @@ -1,6 +1,5 @@ """MoveDataFrame class.""" - -from typing import Dict, List, Text, Union +from __future__ import annotations from dateutil.parser._parser import ParserError from pandas.core.frame import DataFrame @@ -21,12 +20,12 @@ class MoveDataFrame: @staticmethod def __new__( self, - data: Union[DataFrame, Dict, List], - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME, - traj_id: Text = TRAJ_ID, - type_: Text = TYPE_PANDAS, + data: DataFrame | dict | list, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME, + traj_id: str = TRAJ_ID, + type_: str = TYPE_PANDAS, n_partitions: int = 1, ): """ @@ -125,8 +124,8 @@ def validate_move_data_frame(data: DataFrame): @staticmethod def format_labels( - current_id: Text, current_lat: Text, current_lon: Text, current_datetime: Text - ) -> Dict: + current_id: str, current_lat: str, current_lon: str, current_datetime: str + ) -> dict: """ Format the labels for the PyMove lib pattern labels output lat, lon and datatime. diff --git a/pymove/core/grid.py b/pymove/core/grid.py index a29c65a7..f42c782a 100644 --- a/pymove/core/grid.py +++ b/pymove/core/grid.py @@ -1,12 +1,11 @@ """Grid class.""" +from __future__ import annotations import math -from typing import Callable, Dict, Optional, Text, Tuple, Union +from typing import Callable import joblib -import matplotlib.pyplot as plt import numpy as np -from matplotlib.pyplot import figure from pandas import DataFrame from shapely.geometry import Polygon @@ -17,7 +16,6 @@ INDEX_GRID_LON, LATITUDE, LONGITUDE, - POLYGON, TRAJ_ID, ) from pymove.utils.conversions import lat_meters @@ -30,9 +28,9 @@ class Grid: def __init__( self, - data: Union[DataFrame, Dict], - cell_size: Optional[float] = None, - meters_by_degree: Optional[float] = None + data: DataFrame | dict, + cell_size: float | None = None, + meters_by_degree: float | None = None ): """ Creates a virtual grid from the trajectories. @@ -58,7 +56,7 @@ def __init__( ValueError If one of data or cell grid is not provided """ - self.last_operation: Dict = dict() + self.last_operation: dict = dict() if meters_by_degree is None: meters_by_degree = lat_meters(-3.71839) if isinstance(data, dict): @@ -69,7 +67,7 @@ def __init__( raise ValueError('Must pass either data or cell size.') self.grid_polygon = None - def get_grid(self) -> Dict: + def get_grid(self) -> dict: """ Returns the grid object in a dict format. @@ -91,7 +89,7 @@ def get_grid(self) -> Dict: 'cell_size_by_degree': self.cell_size_by_degree, } - def _grid_from_dict(self, dict_grid: Dict): + def _grid_from_dict(self, dict_grid: dict): """ Coverts the dict grid to a Grid object. @@ -218,8 +216,8 @@ def create_update_index_grid_feature( def convert_two_index_grid_to_one( self, data: DataFrame, - label_grid_lat: Text = INDEX_GRID_LAT, - label_grid_lon: Text = INDEX_GRID_LON, + label_grid_lat: str = INDEX_GRID_LAT, + label_grid_lon: str = INDEX_GRID_LON, ): """ Converts grid lat-lon ids to unique values. @@ -241,7 +239,7 @@ def convert_two_index_grid_to_one( def convert_one_index_grid_to_two( self, data: DataFrame, - label_grid_index: Text = INDEX_GRID, + label_grid_index: str = INDEX_GRID, ): """ Converts grid lat-lon ids to unique values. @@ -360,7 +358,7 @@ def create_all_polygons_to_all_point_on_grid( self.last_operation = end_operation(operation) return datapolygons - def point_to_index_grid(self, event_lat: float, event_lon: float) -> Tuple[int, int]: + def point_to_index_grid(self, event_lat: float, event_lon: float) -> tuple[int, int]: """ Locate the coordinates x and y in a grid of point (lat, long). @@ -394,7 +392,7 @@ def point_to_index_grid(self, event_lat: float, event_lon: float) -> Tuple[int, return indexes_lat_y, indexes_lon_x - def save_grid_pkl(self, filename: Text): + def save_grid_pkl(self, filename: str): """ Save a grid with new file .pkl. @@ -409,7 +407,7 @@ def save_grid_pkl(self, filename: Text): joblib.dump(self.get_grid(), f) self.last_operation = end_operation(operation) - def read_grid_pkl(self, filename: Text) -> 'Grid': + def read_grid_pkl(self, filename: str) -> 'Grid': """ Read grid dict from a file .pkl. @@ -431,74 +429,6 @@ def read_grid_pkl(self, filename: Text) -> 'Grid': self.last_operation = end_operation(operation) return grid - def show_grid_polygons( - self, - data: DataFrame, - markersize: float = 10, - linewidth: float = 2, - figsize: Tuple[int, int] = (10, 10), - return_fig: bool = True, - save_fig: bool = False, - name: Text = 'grid.png', - ) -> Optional[figure]: - """ - Generate a visualization with grid polygons. - - Parameters - ---------- - data : DataFrame - Input trajectory data - markersize : float, optional - Represents visualization size marker, by default 10 - linewidth : float, optional - Represents visualization size line, by default 2 - figsize : tuple(int, int), optional - Represents the size (float: width, float: height) of a figure, - by default (10, 10) - return_fig : bool, optional - Represents whether or not to save the generated picture, by default True - save_fig : bool, optional - Wether to save the figure, by default False - name : str, optional - Represents name of a file, by default 'grid.png' - - Returns - ------- - figure - The generated picture or None - - Raises - ------ - If the dataframe does not contains the POLYGON feature - IndexError - If there is no user with the id passed - - """ - if POLYGON not in data: - raise KeyError('POLYGON feature not in dataframe') - - data.dropna(subset=[POLYGON], inplace=True) - - operation = begin_operation('show_grid_polygons') - - fig = plt.figure(figsize=figsize) - - for _, row in data.iterrows(): - xs, ys = row[POLYGON].exterior.xy - plt.plot(ys, xs, 'g', linewidth=linewidth, markersize=markersize) - xs_start, ys_start = data.iloc[0][POLYGON].exterior.xy - xs_end, ys_end = data.iloc[-1][POLYGON].exterior.xy - plt.plot(ys_start, xs_start, 'bo', markersize=markersize * 1.5) - plt.plot(ys_end, xs_end, 'bX', markersize=markersize * 1.5) # start point - - if save_fig: - plt.savefig(fname=name) - - self.last_operation = end_operation(operation) - - if return_fig: - return fig - def __repr__(self) -> str: """ String representation of grid. @@ -512,5 +442,5 @@ def __repr__(self) -> str: grid_size_lon_x: grid longitude size cell_size_by_degree: grid cell size """ - text = ['{}: {}'.format(k, v) for k, v in self.get_grid().items()] + text = [f'{k}: {v}' for k, v in self.get_grid().items()] return '\n'.join(text) diff --git a/pymove/core/pandas.py b/pymove/core/pandas.py index 0f0bf91c..e5da791d 100644 --- a/pymove/core/pandas.py +++ b/pymove/core/pandas.py @@ -1,6 +1,7 @@ """PandasMoveDataFrame class.""" +from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable import numpy as np from pandas import DataFrame, DateOffset, Series, Timedelta @@ -54,11 +55,11 @@ class PandasMoveDataFrame(DataFrame): def __init__( self, - data: Union[DataFrame, List, Dict], - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME, - traj_id: Text = TRAJ_ID, + data: DataFrame | list | dict, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME, + traj_id: str = TRAJ_ID, ): """ Checks whether past data has 'lat', 'lon', 'datetime' columns. @@ -113,9 +114,9 @@ def __init__( if MoveDataFrame.has_columns(tdf): MoveDataFrame.validate_move_data_frame(tdf) - super(PandasMoveDataFrame, self).__init__(tdf) + super().__init__(tdf) self._type = TYPE_PANDAS - self.last_operation: Dict = dict() + self.last_operation: dict = dict() else: raise KeyError( @@ -190,13 +191,13 @@ def datetime(self): def rename( self, - mapper: Optional[Union[Dict, Callable]] = None, - index: Optional[Union[Dict, Callable]] = None, - columns: Optional[Union[Dict, Callable]] = None, - axis: Optional[Union[int, Text]] = None, + mapper: dict | Callable | None = None, + index: dict | Callable | None = None, + columns: dict | Callable | None = None, + axis: int | str | None = None, copy: bool = True, inplace: bool = False - ) -> Optional[Union['PandasMoveDataFrame', DataFrame]]: + ) -> 'PandasMoveDataFrame' | DataFrame | None: """ Alter axes labels. @@ -245,7 +246,7 @@ def rename( if inplace: if MoveDataFrame.has_columns(rename_): self._mgr = rename_._mgr - self._item_cache: Dict = dict() + self._item_cache: dict = dict() rename_ = None else: raise AttributeError( @@ -352,7 +353,7 @@ def get_users_number(self) -> int: def to_grid( self, cell_size: float, - meters_by_degree: Optional[float] = None + meters_by_degree: float | None = None ) -> Grid: """ Converts trajectory data to grid format. @@ -394,7 +395,7 @@ def to_data_frame(self) -> DataFrame: return DataFrame(self) def to_dicrete_move_df( - self, local_label: Text = LOCAL_LABEL + self, local_label: str = LOCAL_LABEL ) -> 'PandasMoveDataFrame': """ Generate a discrete dataframe move. @@ -413,7 +414,7 @@ def to_dicrete_move_df( if local_label not in self: raise ValueError( - 'columns {} not in df'.format(local_label) + f'columns {local_label} not in df' ) self.last_operation = end_operation(operation) @@ -464,10 +465,10 @@ def copy(self, deep: bool = True) -> 'PandasMoveDataFrame': def generate_tid_based_on_id_datetime( self, - str_format: Text = '%Y%m%d%H', + str_format: str = '%Y%m%d%H', sort: bool = True, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create or update trajectory id based on id and datetime. @@ -513,7 +514,7 @@ def generate_tid_based_on_id_datetime( def generate_date_features( self, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create or update date feature based on datetime. @@ -546,7 +547,7 @@ def generate_date_features( def generate_hour_features( self, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create or update hour features based on datetime. @@ -579,7 +580,7 @@ def generate_hour_features( def generate_day_of_the_week_features( self, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create or update day of the week features based on datetime. @@ -613,7 +614,7 @@ def generate_weekend_features( self, create_day_of_week: bool = False, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Adds information to rows determining if it is a weekend day. @@ -661,7 +662,7 @@ def generate_weekend_features( def generate_time_of_day_features( self, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create or update time of day features based on datetime. @@ -719,9 +720,9 @@ def generate_time_of_day_features( def generate_datetime_in_format_cyclical( self, - label_datetime: Text = DATETIME, + label_datetime: str = DATETIME, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create or update column with cyclical datetime feature. @@ -763,8 +764,8 @@ def generate_datetime_in_format_cyclical( @staticmethod def _prepare_generate_data( - data_: DataFrame, sort: bool, label_id: Text - ) -> Tuple[Any, int, None]: + data_: DataFrame, sort: bool, label_id: str + ) -> tuple[Any, int, None]: """ Processes the data and create variables for generate methods. @@ -807,11 +808,11 @@ def _prepare_generate_data( def generate_dist_time_speed_features( self, - label_id: Text = TRAJ_ID, + label_id: str = TRAJ_ID, label_dtype: Callable = np.float64, sort: bool = True, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Adds distance, time and speed information to the dataframe. @@ -903,11 +904,11 @@ def generate_dist_time_speed_features( def generate_dist_features( self, - label_id: Text = TRAJ_ID, + label_id: str = TRAJ_ID, label_dtype: Callable = np.float64, sort: bool = True, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create the three distance in meters to an GPS point P. @@ -991,11 +992,11 @@ def generate_dist_features( def generate_time_features( self, - label_id: Text = TRAJ_ID, + label_id: str = TRAJ_ID, label_dtype: Callable = np.float64, sort: bool = True, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create the three time in seconds to an GPS point P. @@ -1070,11 +1071,11 @@ def generate_time_features( def generate_speed_features( self, - label_id: Text = TRAJ_ID, + label_id: str = TRAJ_ID, label_dtype: Callable = np.float64, sort: bool = True, inplace: bool = True - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Create the three speed in meter by seconds to an GPS point P. @@ -1145,7 +1146,7 @@ def generate_speed_features( def generate_move_and_stop_by_radius( self, radius: float = 0, - target_label: Text = DIST_TO_PREV, + target_label: str = DIST_TO_PREV, inplace: bool = True ): """ @@ -1208,7 +1209,7 @@ def time_interval(self) -> Timedelta: return time_diff - def get_bbox(self) -> Tuple[float, float, float, float]: + def get_bbox(self) -> tuple[float, float, float, float]: """ Returns the bounding box of the dataframe. @@ -1291,7 +1292,7 @@ def show_trajectories_info(self): if LATITUDE and LONGITUDE in self: print( - 'Bounding Box:%s\n' % (self.get_bbox(),) + f'Bounding Box:{self.get_bbox()}\n' ) # bbox return = Lat_min , Long_min, Lat_max, Long_max if TIME_TO_PREV in self: @@ -1326,9 +1327,9 @@ def show_trajectories_info(self): def astype( self, - dtype: Union[Callable, Dict], + dtype: Callable | dict, copy: bool = True, - errors: Text = 'raise' + errors: str = 'raise' ) -> DataFrame: """ Cast a pandas object to a specified dtype. @@ -1382,13 +1383,13 @@ def astype( def sort_values( self, - by: Union[Text, List[Text]], + by: str | list[str], axis: int = 0, ascending: bool = True, inplace: bool = False, - kind: Text = 'quicksort', - na_position: Text = 'last', - ) -> Optional['PandasMoveDataFrame']: + kind: str = 'quicksort', + na_position: str = 'last', + ) -> 'PandasMoveDataFrame' | None: """ Sorts the values of the _data, along an axis. @@ -1441,12 +1442,12 @@ def sort_values( def reset_index( self, - level: Optional[Union[int, Text, Tuple, List]] = None, + level: int | str | tuple | list | None = None, drop: bool = False, inplace: bool = False, - col_level: Union[int, Text] = 0, - col_fill: Text = '' - ) -> Optional['PandasMoveDataFrame']: + col_level: int | str = 0, + col_fill: str = '' + ) -> 'PandasMoveDataFrame' | None: """ Resets the DataFrame's index, and use the default one. @@ -1489,12 +1490,12 @@ def reset_index( def set_index( self, - keys: Union[Text, List[Text]], + keys: str | list[str], drop: bool = True, append: bool = False, inplace: bool = False, verify_integrity: bool = False, - ) -> Optional[Union['PandasMoveDataFrame', DataFrame]]: + ) -> 'PandasMoveDataFrame' | DataFrame | None: """ Set the DataFrame index (row labels) using one or more existing columns or arrays. @@ -1558,14 +1559,14 @@ def set_index( def drop( self, - labels: Optional[Union[Text, List[Text]]] = None, - axis: Union[int, Text] = 0, - index: Optional[Union[Text, List[Text]]] = None, - columns: Optional[Union[Text, List[Text]]] = None, - level: Optional[Union[int, Text]] = None, + labels: str | list[str] | None = None, + axis: int | str = 0, + index: str | list[str] | None = None, + columns: str | list[str] | None = None, + level: int | str | None = None, inplace: bool = False, - errors: Text = 'raise', - ) -> Optional[Union['PandasMoveDataFrame', DataFrame]]: + errors: str = 'raise', + ) -> 'PandasMoveDataFrame' | DataFrame | None: """ Removes rows or columns. @@ -1651,10 +1652,10 @@ def drop( def drop_duplicates( self, - subset: Optional[Union[int, Text]] = None, - keep: Union[Text, bool] = 'first', + subset: int | str | None = None, + keep: str | bool = 'first', inplace: bool = False - ) -> Optional['PandasMoveDataFrame']: + ) -> 'PandasMoveDataFrame' | None: """ Uses the pandas's function drop_duplicates, to remove duplicated rows from data. @@ -1694,9 +1695,9 @@ def drop_duplicates( def shift( self, periods: int = 1, - freq: Optional[Union[DateOffset, Timedelta, Text]] = None, - axis: Union[int, Text] = 0, - fill_value: Optional[Any] = None + freq: DateOffset | Timedelta | str | None = None, + axis: int | str = 0, + fill_value: Any | None = None ) -> 'PandasMoveDataFrame': """ Shift index by desired number of periods with an optional time freq. @@ -1740,12 +1741,12 @@ def shift( def fillna( self, - value: Optional[Any] = None, - method: Optional[Text] = None, - axis: Optional[Union[int, Text]] = None, + value: Any | None = None, + method: str | None = None, + axis: int | str | None = None, inplace: bool = False, - limit: Optional[int] = None, - downcast: Optional[Dict] = None, + limit: int | None = None, + downcast: dict | None = None, ): """ Fill NA/NaN values using the specified method. @@ -1803,10 +1804,10 @@ def fillna( def dropna( self, - axis: Union[int, Text] = 0, - how: Text = 'any', - thresh: Optional[float] = None, - subset: Optional[List] = None, + axis: int | str = 0, + how: str = 'any', + thresh: float | None = None, + subset: list | None = None, inplace: bool = False ): """ @@ -1870,12 +1871,12 @@ def dropna( def sample( self, - n: Optional[int] = None, - frac: Optional[float] = None, + n: int | None = None, + frac: float | None = None, replace: bool = False, - weights: Optional[Union[Text, List]] = None, - random_state: Optional[int] = None, - axis: Optional[Union[int, Text]] = None + weights: str | list | None = None, + random_state: int | None = None, + axis: int | str | None = None ) -> 'PandasMoveDataFrame': """ Return a random sample of items from an axis of object. @@ -1938,7 +1939,7 @@ def sample( ) return PandasMoveDataFrame(data=_sample) - def isin(self, values: Union[List, Series, DataFrame, Dict]) -> DataFrame: + def isin(self, values: list | Series | DataFrame | dict) -> DataFrame: """ Determines whether each element in the DataFrame is contained in values. @@ -1965,7 +1966,7 @@ def isin(self, values: Union[List, Series, DataFrame, Dict]) -> DataFrame: def append( self, - other: Union['PandasMoveDataFrame', DataFrame], + other: 'PandasMoveDataFrame' | DataFrame, ignore_index: bool = False, verify_integrity: bool = False, sort: bool = False @@ -2010,11 +2011,11 @@ def append( def join( self, - other: Union['PandasMoveDataFrame', DataFrame], - on: Optional[Union[Text, List]] = None, - how: Text = 'left', - lsuffix: Text = '', - rsuffix: Text = '', + other: 'PandasMoveDataFrame' | DataFrame, + on: str | list | None = None, + how: str = 'left', + lsuffix: str = '', + rsuffix: str = '', sort: bool = False ) -> 'PandasMoveDataFrame': """ @@ -2080,18 +2081,18 @@ def join( def merge( self, - right: Union['PandasMoveDataFrame', DataFrame, Series], - how: Text = 'inner', - on: Optional[Union[Text, List]] = None, - left_on: Optional[Union[Text, List]] = None, - right_on: Optional[Union[Text, List]] = None, + right: 'PandasMoveDataFrame' | DataFrame | Series, + how: str = 'inner', + on: str | list | None = None, + left_on: str | list | None = None, + right_on: str | list | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, - suffixes: Tuple[Text, Text] = ('_x', '_y'), + suffixes: tuple[str, str] = ('_x', '_y'), copy: bool = True, - indicator: Union[bool, Text] = False, - validate: Optional[Text] = None + indicator: bool | str = False, + validate: str | None = None ) -> 'PandasMoveDataFrame': """ Merge DataFrame or named Series objects with a database-style join. @@ -2180,7 +2181,7 @@ def merge( ) return PandasMoveDataFrame(data=_merge) - def write_file(self, file_name: Text, separator: Text = ','): + def write_file(self, file_name: str, separator: str = ','): """ Write trajectory data to a new file. @@ -2197,8 +2198,8 @@ def write_file(self, file_name: Text, separator: Text = ','): ) def convert_to( - self, new_type: Text - ) -> Union[MoveDataFrame, 'PandasMoveDataFrame', 'DaskMoveDataFrame']: + self, new_type: str + ) -> MoveDataFrame | 'PandasMoveDataFrame' | 'DaskMoveDataFrame': """ Convert an object from one type to another specified by the user. @@ -2231,7 +2232,7 @@ def convert_to( self.last_operation = end_operation(operation) return self - def get_type(self) -> Text: + def get_type(self) -> str: """ Returns the type of the object. diff --git a/pymove/core/pandas_discrete.py b/pymove/core/pandas_discrete.py index 54e9c93e..04d65ab5 100644 --- a/pymove/core/pandas_discrete.py +++ b/pymove/core/pandas_discrete.py @@ -1,6 +1,5 @@ """PandasDiscreteMoveDataFrame class.""" - -from typing import Dict, List, Optional, Text, Union +from __future__ import annotations import numpy as np import pandas as pd @@ -37,12 +36,12 @@ class PandasDiscreteMoveDataFrame(PandasMoveDataFrame): def __init__( self, - data: Union[DataFrame, List, Dict], - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME, - traj_id: Text = TRAJ_ID, - local_label: Text = LOCAL_LABEL + data: DataFrame | list | dict, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME, + traj_id: str = TRAJ_ID, + local_label: str = LOCAL_LABEL ): """ Creates a dataframe using local_label as a discrete feature for localization. @@ -69,7 +68,7 @@ def __init__( ValueError, ParserError If the data types can't be converted. """ - super(PandasDiscreteMoveDataFrame, self).__init__( + super().__init__( data=data, latitude=latitude, longitude=longitude, @@ -79,7 +78,7 @@ def __init__( if local_label not in self: raise ValueError( - '{} column not in dataframe'.format(local_label) + f'{local_label} column not in dataframe' ) def discretize_based_grid(self, region_size: int = 1000): @@ -100,11 +99,11 @@ def discretize_based_grid(self, region_size: int = 1000): def generate_prev_local_features( self, - label_id: Text = TRAJ_ID, - local_label: Text = LOCAL_LABEL, + label_id: str = TRAJ_ID, + local_label: str = LOCAL_LABEL, sort: bool = True, inplace: bool = True - ) -> Optional['PandasDiscreteMoveDataFrame']: + ) -> 'PandasDiscreteMoveDataFrame' | None: """ Create a feature prev_local with the label of previous local to current point. @@ -145,7 +144,7 @@ def generate_prev_local_features( if (data_[local_label].dtype == 'int'): data_[local_label] = data_[local_label].astype(np.float16) for idx in progress_bar( - ids, desc='Generating previous {}'.format(local_label) + ids, desc=f'Generating previous {local_label}' ): current_local = data_.at[idx, local_label] current_local = np.array(current_local) @@ -168,15 +167,15 @@ def generate_prev_local_features( def generate_tid_based_statistics( self, - label_id: Text = TRAJ_ID, - local_label: Text = LOCAL_LABEL, + label_id: str = TRAJ_ID, + local_label: str = LOCAL_LABEL, mean_coef: float = 1.0, std_coef: float = 1.0, - statistics: Optional[DataFrame] = None, - label_tid_stat: Text = TID_STAT, + statistics: DataFrame | None = None, + label_tid_stat: str = TID_STAT, drop_single_points: bool = False, inplace: bool = True, - ) -> Optional['PandasDiscreteMoveDataFrame']: + ) -> 'PandasDiscreteMoveDataFrame' | None: """ Splits the trajectories into segments based on time statistics for segments. @@ -223,7 +222,7 @@ def generate_tid_based_statistics( self.generate_dist_time_speed_features(TRAJ_ID) if local_label not in data_: - raise KeyError('{} not in data frame.'.format(local_label)) + raise KeyError(f'{local_label} not in data frame.') if PREV_LOCAL not in data_: data_[local_label] = data_[local_label].astype(np.float64) @@ -234,7 +233,7 @@ def generate_tid_based_statistics( if statistics is None: if (data_[PREV_LOCAL].isna().sum() == data_.shape[0]): raise ValueError( - 'all values in the {} column are null.'.format(PREV_LOCAL) + f'all values in the {PREV_LOCAL} column are null.' ) else: statistics = generate_time_statistics(data_, local_label=local_label) @@ -269,7 +268,7 @@ def generate_tid_based_statistics( if label_id == TID_STAT: self.reset_index(drop=True, inplace=True) logger.debug( - '... {} = {}, then reseting and drop index!'.format(TID, TID_STAT)) + f'... {TID} = {TID_STAT}, then reseting and drop index!') else: self.reset_index(inplace=True) logger.debug('... reseting index\n') diff --git a/pymove/models/pattern_mining/clustering.py b/pymove/models/pattern_mining/clustering.py index d9302b3b..a874b79f 100644 --- a/pymove/models/pattern_mining/clustering.py +++ b/pymove/models/pattern_mining/clustering.py @@ -6,8 +6,9 @@ dbscan_clustering """ +from __future__ import annotations -from typing import Callable, Dict, Optional, Text, Union +from typing import Callable import numpy as np from pandas import DataFrame @@ -24,8 +25,8 @@ def elbow_method( k_initial: int = 1, max_clusters: int = 15, k_iteration: int = 1, - random_state: Optional[int] = None -) -> Dict: + random_state: int | None = None +) -> dict: """ Determines the optimal number of clusters. @@ -84,8 +85,8 @@ def gap_statistic( k_initial: int = 1, max_clusters: int = 15, k_iteration: int = 1, - random_state: Optional[int] = None -) -> Dict: + random_state: int | None = None +) -> dict: """ Calculates optimal clusters numbers using Gap Statistic. @@ -151,13 +152,13 @@ def gap_statistic( @timer_decorator def dbscan_clustering( move_data: DataFrame, - cluster_by: Text, + cluster_by: str, meters: int = 10, min_sample: float = 1680 / 2, earth_radius: float = EARTH_RADIUS, - metric: Union[Text, Callable] = 'euclidean', + metric: str | Callable = 'euclidean', inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs density based clustering on the move_dataframe according to cluster_by. diff --git a/pymove/preprocessing/compression.py b/pymove/preprocessing/compression.py index f79963ee..4bba2a38 100644 --- a/pymove/preprocessing/compression.py +++ b/pymove/preprocessing/compression.py @@ -4,8 +4,7 @@ compress_segment_stop_to_point """ - -from typing import Text +from __future__ import annotations import numpy as np from pandas import DataFrame @@ -28,11 +27,11 @@ @timer_decorator def compress_segment_stop_to_point( move_data: DataFrame, - label_segment: Text = SEGMENT_STOP, - label_stop: Text = STOP, - point_mean: Text = 'default', + label_segment: str = SEGMENT_STOP, + label_stop: str = STOP, + point_mean: str = 'default', drop_moves: bool = False, - label_id: Text = TRAJ_ID, + label_id: str = TRAJ_ID, dist_radius: float = 30, time_radius: float = 900, inplace: bool = False, @@ -147,7 +146,7 @@ def compress_segment_stop_to_point( lat_mean[ind_end] = move_data.loc[filter_][LATITUDE].mean() lon_mean[ind_end] = move_data.loc[filter_][LONGITUDE].mean() else: - logger.debug('There are segments with only one point: {}'.format(idx)) + logger.debug(f'There are segments with only one point: {idx}') move_data[LAT_MEAN] = lat_mean move_data[LON_MEAN] = lon_mean diff --git a/pymove/preprocessing/filters.py b/pymove/preprocessing/filters.py index 052881d4..871f17ad 100644 --- a/pymove/preprocessing/filters.py +++ b/pymove/preprocessing/filters.py @@ -17,8 +17,9 @@ clean_id_by_time_max """ +from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable import numpy as np from pandas import DataFrame @@ -43,8 +44,8 @@ def get_bbox_by_radius( - coordinates: Tuple[float, float], radius: float = 1000 -) -> Tuple[float, float, float, float]: + coordinates: tuple[float, float], radius: float = 1000 +) -> tuple[float, float, float, float]: """ Defines minimum and maximum coordinates, given a distance radius from a point. @@ -82,10 +83,10 @@ def get_bbox_by_radius( def by_bbox( move_data: DataFrame, - bbox: Tuple[float, float, float, float], + bbox: tuple[float, float, float, float], filter_out: bool = False, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Filters points of the trajectories according to specified bounding box. @@ -123,11 +124,11 @@ def by_bbox( def by_datetime( move_data: DataFrame, - start_datetime: Optional[Text] = None, - end_datetime: Optional[Text] = None, + start_datetime: str | None = None, + end_datetime: str | None = None, filter_out: bool = False, inplace: bool = False, -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Filters trajectories points according to specified time range. @@ -173,10 +174,10 @@ def by_datetime( def by_label( move_data: DataFrame, value: Any, - label_name: Text, + label_name: str, filter_out: bool = False, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Filters trajectories points according to specified value and column label. @@ -212,11 +213,11 @@ def by_label( def by_id( move_data: DataFrame, - id_: Optional[int] = None, - label_id: Text = TRAJ_ID, + id_: int | None = None, + label_id: str = TRAJ_ID, filter_out: bool = False, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Filters trajectories points according to specified trajectory id. @@ -250,10 +251,10 @@ def by_id( def by_tid( move_data: DataFrame, - tid_: Optional[Text] = None, + tid_: str | None = None, filter_out: bool = False, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Filters trajectories points according to a specified trajectory tid. @@ -286,10 +287,10 @@ def by_tid( def clean_consecutive_duplicates( move_data: DataFrame, - subset: Optional[Union[int, Text]] = None, - keep: Union[Text, bool] = 'first', + subset: int | str | None = None, + keep: str | bool = 'first', inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Removes consecutive duplicate rows of the Dataframe. @@ -378,7 +379,7 @@ def _filter_speed_max_radius(move_data: DataFrame, **kwargs): return move_data[filter_] -def _filter_data(move_data: DataFrame, f: Callable, kwargs: Dict): +def _filter_data(move_data: DataFrame, f: Callable, kwargs: dict): """ Filter the dataframe using condition from given function. @@ -466,13 +467,13 @@ def _clean_gps(move_data: DataFrame, f: Callable, **kwargs): def clean_gps_jumps_by_distance( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, jump_coefficient: float = 3.0, threshold: float = 1, label_dtype: Callable = np.float64, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Removes the trajectories points that are outliers from the dataframe. @@ -524,12 +525,12 @@ def clean_gps_jumps_by_distance( def clean_gps_nearby_points_by_distances( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, radius_area: float = 10.0, label_dtype: Callable = np.float64, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Removes points from the trajectories with smaller distance from the point before. @@ -579,12 +580,12 @@ def clean_gps_nearby_points_by_distances( def clean_gps_nearby_points_by_speed( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, speed_radius: float = 0.0, label_dtype: Callable = np.float64, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Removes points from the trajectories with smaller speed of travel. @@ -634,12 +635,12 @@ def clean_gps_nearby_points_by_speed( def clean_gps_speed_max_radius( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, speed_max: float = 50.0, label_dtype: Callable = np.float64, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Removes trajectories points with higher speed. @@ -698,11 +699,11 @@ def clean_gps_speed_max_radius( def clean_trajectories_with_few_points( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_tid: Text = TID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_tid: str = TID, min_points_per_trajectory: int = 2, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Removes from the given dataframe, trajectories with fewer points. @@ -775,13 +776,13 @@ def clean_trajectories_with_few_points( def clean_trajectories_short_and_few_points( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TID, min_trajectory_distance: float = 100, min_points_per_trajectory: int = 2, label_dtype: Callable = np.float64, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Eliminates from the given dataframe trajectories with fewer points and shorter length. @@ -863,12 +864,12 @@ def clean_trajectories_short_and_few_points( def clean_id_by_time_max( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, time_max: float = 3600, label_dtype: Callable = np.float64, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Clears GPS points with time by ID greater than a user-defined limit. @@ -909,7 +910,7 @@ def clean_id_by_time_max( move_dataid_drop = ( move_data.groupby([label_id], as_index=False) .agg({TIME_TO_PREV: 'sum'}) - .query('%s < %s' % (TIME_TO_PREV, time_max)) + .query(f'{TIME_TO_PREV} < {time_max}') ) logger.debug( '...Ids total: %s\nIds to drop:%s' diff --git a/pymove/preprocessing/segmentation.py b/pymove/preprocessing/segmentation.py index 59c94f01..4dfcfe37 100644 --- a/pymove/preprocessing/segmentation.py +++ b/pymove/preprocessing/segmentation.py @@ -8,8 +8,9 @@ by_max_speed """ +from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -34,7 +35,7 @@ @timer_decorator -def bbox_split(bbox: Tuple[int, int, int, int], number_grids: int) -> DataFrame: +def bbox_split(bbox: tuple[int, int, int, int], number_grids: int) -> DataFrame: """ Splits the bounding box in N grids of the same size. @@ -60,7 +61,7 @@ def bbox_split(bbox: Tuple[int, int, int, int], number_grids: int) -> DataFrame: const_lat = abs(abs(lat_max) - abs(lat_min)) / number_grids const_lon = abs(abs(lon_max) - abs(lon_min)) / number_grids - logger.debug('const_lat: {}\nconst_lon: {}'.format(const_lat, const_lon)) + logger.debug(f'const_lat: {const_lat}\nconst_lon: {const_lon}') move_data = pd.DataFrame( columns=['lat_min', 'lon_min', 'lat_max', 'lon_max'] @@ -79,7 +80,7 @@ def bbox_split(bbox: Tuple[int, int, int, int], number_grids: int) -> DataFrame: return move_data -def _drop_single_point(move_data: DataFrame, label_new_tid: Text, label_id: Text): +def _drop_single_point(move_data: DataFrame, label_new_tid: str, label_id: str): """ Removes trajectory with single point. @@ -147,7 +148,7 @@ def _filter_and_dist_time_speed( def _filter_or_dist_time_speed( - move_data: DataFrame, idx: int, feature: Text, max_between_adj_points: float + move_data: DataFrame, idx: int, feature: str, max_between_adj_points: float ) -> ndarray: """ Filters the dataframe considering thresholds for time, dist and speed. @@ -172,7 +173,7 @@ def _filter_or_dist_time_speed( return np.nan_to_num(move_data.at[idx, feature]) > max_between_adj_points -def _prepare_segmentation(move_data: DataFrame, label_id: Text, label_new_tid: Text): +def _prepare_segmentation(move_data: DataFrame, label_id: str, label_new_tid: str): """ Resets the dataframe index, collects unique ids and initiates curr_id and count. @@ -196,7 +197,7 @@ def _prepare_segmentation(move_data: DataFrame, label_id: Text, label_new_tid: T """ if move_data.index.name is None: - logger.debug('...setting {} as index'.format(label_id)) + logger.debug(f'...setting {label_id} as index') move_data.set_index(label_id, inplace=True) curr_tid = 0 if label_new_tid not in move_data: @@ -209,8 +210,8 @@ def _prepare_segmentation(move_data: DataFrame, label_id: Text, label_new_tid: T def _update_curr_tid_count( filter_: ndarray, move_data: DataFrame, idx: int, - label_new_tid: Text, curr_tid: int, count: int -) -> Tuple[int, int]: + label_new_tid: str, curr_tid: int, count: int +) -> tuple[int, int]: """ Updates the tid. @@ -239,7 +240,7 @@ def _update_curr_tid_count( """ curr_tid += 1 if filter_.shape == (): - logger.debug('id: {} has no point to split'.format(idx)) + logger.debug(f'id: {idx} has no point to split') move_data.at[idx, label_new_tid] = curr_tid count += 1 else: @@ -255,7 +256,7 @@ def _update_curr_tid_count( def _filter_by( - move_data: DataFrame, label_id: Text, label_new_tid: Text, + move_data: DataFrame, label_id: str, label_new_tid: str, drop_single_points: bool, **kwargs ) -> DataFrame: """ @@ -333,15 +334,15 @@ def _filter_by( @timer_decorator def by_dist_time_speed( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, max_dist_between_adj_points: float = 3000, max_time_between_adj_points: float = 900, max_speed_between_adj_points: float = 50.0, drop_single_points: bool = True, - label_new_tid: Text = TID_PART, + label_new_tid: str = TID_PART, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Splits the trajectories into segments based on distance, time and speed. @@ -413,13 +414,13 @@ def by_dist_time_speed( @timer_decorator def by_max_dist( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, max_dist_between_adj_points: float = 3000, drop_single_points: bool = True, - label_new_tid: Text = TID_DIST, + label_new_tid: str = TID_DIST, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Segments the trajectories based on distance. @@ -480,13 +481,13 @@ def by_max_dist( @timer_decorator def by_max_time( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, max_time_between_adj_points: float = 900.0, drop_single_points: bool = True, - label_new_tid: Text = TID_TIME, + label_new_tid: str = TID_TIME, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Splits the trajectories into segments based on a maximum. @@ -548,13 +549,13 @@ def by_max_time( @timer_decorator def by_max_speed( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_id: Text = TRAJ_ID, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_id: str = TRAJ_ID, max_speed_between_adj_points: float = 50.0, drop_single_points: bool = True, - label_new_tid: Text = TID_SPEED, + label_new_tid: str = TID_SPEED, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Splits the trajectories into segments based on a maximum speed. diff --git a/pymove/preprocessing/stay_point_detection.py b/pymove/preprocessing/stay_point_detection.py index ba819f5f..a7631699 100644 --- a/pymove/preprocessing/stay_point_detection.py +++ b/pymove/preprocessing/stay_point_detection.py @@ -5,8 +5,9 @@ create_or_update_move_and_stop_by_radius """ +from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Text, Union +from typing import TYPE_CHECKING import numpy as np @@ -29,13 +30,13 @@ @timer_decorator def create_or_update_move_stop_by_dist_time( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', dist_radius: float = 30, time_radius: float = 900, - label_id: Text = TRAJ_ID, - new_label: Text = SEGMENT_STOP, + label_id: str = TRAJ_ID, + new_label: str = SEGMENT_STOP, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Determines the stops and moves points of the dataframe. @@ -97,7 +98,7 @@ def create_or_update_move_stop_by_dist_time( move_dataagg_tid = ( move_data.groupby(by=new_label) .agg({TIME_TO_PREV: 'sum'}) - .query('%s > %s' % (TIME_TO_PREV, time_radius)) + .query(f'{TIME_TO_PREV} > {time_radius}') .index ) idx = move_data[ @@ -112,12 +113,12 @@ def create_or_update_move_stop_by_dist_time( @timer_decorator def create_or_update_move_and_stop_by_radius( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', radius: float = 0, - target_label: Text = DIST_TO_PREV, - new_label: Text = SITUATION, + target_label: str = DIST_TO_PREV, + new_label: str = SITUATION, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Finds the stops and moves points of the dataframe. diff --git a/pymove/query/query.py b/pymove/query/query.py index 3ea17ee2..aa02915b 100644 --- a/pymove/query/query.py +++ b/pymove/query/query.py @@ -5,8 +5,7 @@ knn_query """ - -from typing import Text +from __future__ import annotations import numpy as np import pandas as pd @@ -20,12 +19,12 @@ def range_query( traj: DataFrame, move_df: DataFrame, - _id: Text = TRAJ_ID, + _id: str = TRAJ_ID, min_dist: float = 1000, - distance: Text = MEDP, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME + distance: str = MEDP, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME ) -> DataFrame: """ Returns all trajectories that have a distance equal to or less than the trajectory. @@ -81,7 +80,7 @@ def dist_measure(traj, this, latitude, longitude, datetime): raise ValueError('Unknown distance measure. Use MEDP or MEDT') for traj_id in progress_bar( - move_df[_id].unique(), desc='Querying range by {}'.format(distance) + move_df[_id].unique(), desc=f'Querying range by {distance}' ): this = move_df.loc[move_df[_id] == traj_id] if dist_measure(traj, this, latitude, longitude, datetime) < min_dist: @@ -94,11 +93,11 @@ def knn_query( traj: DataFrame, move_df: DataFrame, k: int = 5, - id_: Text = TRAJ_ID, - distance: Text = MEDP, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME + id_: str = TRAJ_ID, + distance: str = MEDP, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME ) -> DataFrame: """ Returns the k neighboring trajectories closest to the trajectory. @@ -154,7 +153,7 @@ def dist_measure(traj, this, latitude, longitude, datetime): raise ValueError('Unknown distance measure. Use MEDP or MEDT') for traj_id in progress_bar( - move_df[id_].unique(), desc='Querying knn by {}'.format(distance) + move_df[id_].unique(), desc=f'Querying knn by {distance}' ): if (traj_id != traj[id_].values[0]): this = move_df.loc[move_df[id_] == traj_id] diff --git a/pymove/semantic/semantic.py b/pymove/semantic/semantic.py index ab69f96d..5f7e5d65 100644 --- a/pymove/semantic/semantic.py +++ b/pymove/semantic/semantic.py @@ -12,8 +12,9 @@ filter_longer_time_to_stop_segment_by_id """ +from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING import numpy as np from pandas import DataFrame @@ -42,8 +43,8 @@ def _end_create_operation( - move_data: DataFrame, new_label: Text, inplace: bool -) -> Optional[DataFrame]: + move_data: DataFrame, new_label: str, inplace: bool +) -> DataFrame | None: """ Returns the dataframe after create operation. @@ -69,8 +70,8 @@ def _end_create_operation( def _process_simple_filter( - move_data: DataFrame, new_label: Text, feature: Text, value: float, inplace: bool -) -> Optional[DataFrame]: + move_data: DataFrame, new_label: str, feature: str, value: float, inplace: bool +) -> DataFrame | None: """ Processes create operation with simple filter. @@ -108,12 +109,12 @@ def _process_simple_filter( @timer_decorator def outliers( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', jump_coefficient: float = 3.0, threshold: float = 1, - new_label: Text = OUTLIER, + new_label: str = OUTLIER, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Create or update a boolean feature to detect outliers. @@ -173,10 +174,10 @@ def outliers( @timer_decorator def create_or_update_out_of_the_bbox( move_data: DataFrame, - bbox: Tuple[int, int, int, int], - new_label: Text = OUT_BBOX, + bbox: tuple[int, int, int, int], + new_label: str = OUT_BBOX, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Create or update a boolean feature to detect points out of the bbox. @@ -229,11 +230,11 @@ def create_or_update_out_of_the_bbox( @timer_decorator def create_or_update_gps_deactivated_signal( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', max_time_between_adj_points: float = 7200, - new_label: Text = DEACTIVATED, + new_label: str = DEACTIVATED, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Creates a new feature that inform if point invalid. @@ -279,11 +280,11 @@ def create_or_update_gps_deactivated_signal( @timer_decorator def create_or_update_gps_jump( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', max_dist_between_adj_points: float = 3000, - new_label: Text = JUMP, + new_label: str = JUMP, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Creates a new feature that inform if point is a gps jump. @@ -328,15 +329,15 @@ def create_or_update_gps_jump( @timer_decorator def create_or_update_short_trajectory( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', max_dist_between_adj_points: float = 3000, max_time_between_adj_points: float = 7200, max_speed_between_adj_points: float = 50, k_segment_max: int = 50, - label_tid: Text = TID_PART, - new_label: Text = SHORT, + label_tid: str = TID_PART, + new_label: str = SHORT, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Creates a new feature that inform if point belongs to a short trajectory. @@ -397,12 +398,12 @@ def create_or_update_short_trajectory( @timer_decorator def create_or_update_gps_block_signal( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', max_time_stop: float = 7200, - new_label: Text = BLOCK, - label_tid: Text = TID_PART, + new_label: str = BLOCK, + label_tid: str = TID_PART, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Creates a new feature that inform segments with periods without moving. @@ -460,13 +461,13 @@ def create_or_update_gps_block_signal( @timer_decorator def filter_block_signal_by_repeated_amount_of_points( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', amount_max_of_points_stop: float = 30.0, max_time_stop: float = 7200, filter_out: bool = False, - label_tid: Text = TID_PART, + label_tid: str = TID_PART, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Filters from dataframe points with blocked signal by amount of points. @@ -521,12 +522,12 @@ def filter_block_signal_by_repeated_amount_of_points( @timer_decorator def filter_block_signal_by_time( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', max_time_stop: float = 7200, filter_out: bool = False, - label_tid: Text = TID_PART, + label_tid: str = TID_PART, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Filters from dataframe points with blocked signal by time. @@ -582,14 +583,14 @@ def filter_block_signal_by_time( @timer_decorator def filter_longer_time_to_stop_segment_by_id( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', dist_radius: float = 30, time_radius: float = 900, - label_id: Text = TRAJ_ID, - label_segment_stop: Text = SEGMENT_STOP, + label_id: str = TRAJ_ID, + label_segment_stop: str = SEGMENT_STOP, filter_out: bool = False, inplace: bool = False -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Filters from dataframe segment with longest stop time. diff --git a/pymove/tests/test_visualization_matplotlib.py b/pymove/tests/test_visualization_matplotlib.py index 8b4e49f6..ddf0644e 100644 --- a/pymove/tests/test_visualization_matplotlib.py +++ b/pymove/tests/test_visualization_matplotlib.py @@ -67,7 +67,7 @@ def test_show_object_id_by_date(tmpdir): def test_plot_trajectory_by_id(tmpdir): move_df = _default_move_df() - move_df[TID] = ['1', '1', '2', '2', '2'] + move_df[TRAJ_ID] = ['1', '1', '2', '2', '2'] d = tmpdir.mkdir('visualization') diff --git a/pymove/utils/conversions.py b/pymove/utils/conversions.py index 9cb45b84..fd39efbd 100644 --- a/pymove/utils/conversions.py +++ b/pymove/utils/conversions.py @@ -24,10 +24,10 @@ hours_to_seconds """ - +from __future__ import annotations import math -from typing import TYPE_CHECKING, List, Optional, Text, Union +from typing import TYPE_CHECKING import numpy as np from numpy import ndarray @@ -109,7 +109,7 @@ def meters_to_eps( return radius_meters / earth_radius -def list_to_str(input_list: List, delimiter: Text = ',') -> Text: +def list_to_str(input_list: list, delimiter: str = ',') -> str: """ Concatenates a list elements, joining them by the separator `delimiter`. @@ -138,7 +138,7 @@ def list_to_str(input_list: List, delimiter: Text = ',') -> Text: ) -def list_to_csv_str(input_list: List) -> Text: +def list_to_csv_str(input_list: list) -> str: """ Concatenates the elements of the list, joining them by ",". @@ -163,7 +163,7 @@ def list_to_csv_str(input_list: List) -> Text: return list_to_str(input_list) -def list_to_svm_line(original_list: List) -> Text: +def list_to_svm_line(original_list: list) -> str: """ Concatenates list elements in consecutive element pairs. @@ -188,11 +188,11 @@ def list_to_svm_line(original_list: List) -> Text: list_size = len(original_list) svm_line = '%s ' % original_list[0] for i in range(1, list_size): - svm_line += '%s:%s ' % (i, original_list[i]) + svm_line += f'{i}:{original_list[i]} ' return svm_line.rstrip() -def lon_to_x_spherical(lon: Union[float, ndarray]) -> Union[float, ndarray]: +def lon_to_x_spherical(lon: float | ndarray) -> float | ndarray: """ Convert longitude to X EPSG:3857 WGS 84/Pseudo-Mercator. @@ -222,7 +222,7 @@ def lon_to_x_spherical(lon: Union[float, ndarray]) -> Union[float, ndarray]: return 6378137 * np.radians(lon) -def lat_to_y_spherical(lat: Union[float, ndarray]) -> Union[float, ndarray]: +def lat_to_y_spherical(lat: float | ndarray) -> float | ndarray: """ Convert latitude to Y EPSG:3857 WGS 84/Pseudo-Mercator. @@ -252,7 +252,7 @@ def lat_to_y_spherical(lat: Union[float, ndarray]) -> Union[float, ndarray]: return 6378137 * np.log(np.tan(np.pi / 4 + np.radians(lat) / 2.0)) -def x_to_lon_spherical(x: Union[float, ndarray]) -> Union[float, ndarray]: +def x_to_lon_spherical(x: float | ndarray) -> float | ndarray: """ Convert X EPSG:3857 WGS 84 / Pseudo-Mercator to longitude. @@ -281,7 +281,7 @@ def x_to_lon_spherical(x: Union[float, ndarray]) -> Union[float, ndarray]: return np.degrees(x / 6378137.0) -def y_to_lat_spherical(y: Union[float, ndarray]) -> Union[float, ndarray]: +def y_to_lat_spherical(y: float | ndarray) -> float | ndarray: """ Convert Y EPSG:3857 WGS 84 / Pseudo-Mercator to latitude. @@ -312,7 +312,7 @@ def y_to_lat_spherical(y: Union[float, ndarray]) -> Union[float, ndarray]: def geometry_points_to_lat_and_lon( move_data: DataFrame, - geometry_label: Text = GEOMETRY, + geometry_label: str = GEOMETRY, drop_geometry: bool = False, inplace: bool = False ) -> DataFrame: @@ -369,8 +369,8 @@ def geometry_points_to_lat_and_lon( def lat_and_lon_decimal_degrees_to_decimal( move_data: DataFrame, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE + latitude: str = LATITUDE, + longitude: str = LONGITUDE ) -> DataFrame: """ Converts latitude and longitude format from decimal degrees to decimal format. @@ -419,11 +419,11 @@ def _decimal_degree_to_decimal(row): def ms_to_kmh( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_speed: Text = SPEED_TO_PREV, - new_label: Text = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_speed: str = SPEED_TO_PREV, + new_label: str = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in ms, in label_speed column to kmh. @@ -496,11 +496,11 @@ def ms_to_kmh( def kmh_to_ms( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_speed: Text = SPEED_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_speed: str = SPEED_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in kmh, in label_speed column to ms. @@ -567,11 +567,11 @@ def kmh_to_ms( def meters_to_kilometers( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_distance: Text = DIST_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_distance: str = DIST_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in meters, in label_distance column to kilometers. @@ -637,11 +637,11 @@ def meters_to_kilometers( def kilometers_to_meters( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_distance: Text = DIST_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_distance: str = DIST_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in kilometers, in label_distance column to meters. @@ -708,11 +708,11 @@ def kilometers_to_meters( def seconds_to_minutes( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Text = TIME_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_time: str = TIME_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in seconds, in label_distance column to minutes. @@ -778,11 +778,11 @@ def seconds_to_minutes( def minute_to_seconds( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Text = TIME_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_time: str = TIME_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in minutes, in label_distance column to seconds. @@ -849,11 +849,11 @@ def minute_to_seconds( def minute_to_hours( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Text = TIME_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_time: str = TIME_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in minutes, in label_distance column to hours. @@ -921,11 +921,11 @@ def minute_to_hours( def hours_to_minute( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Text = TIME_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_time: str = TIME_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in hours, in label_distance column to minute. @@ -992,11 +992,11 @@ def hours_to_minute( def seconds_to_hours( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Text = TIME_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_time: str = TIME_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in seconds, in label_distance column to hours. @@ -1063,11 +1063,11 @@ def seconds_to_hours( def hours_to_seconds( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - label_time: Text = TIME_TO_PREV, - new_label: Optional[Text] = None, + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + label_time: str = TIME_TO_PREV, + new_label: str | None = None, inplace: bool = False, -) -> Optional[Union['PandasMoveDataFrame', 'DaskMoveDataFrame']]: +) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None: """ Convert values, in hours, in label_distance column to seconds. diff --git a/pymove/utils/data_augmentation.py b/pymove/utils/data_augmentation.py index d5f2e1b5..1bbb2771 100644 --- a/pymove/utils/data_augmentation.py +++ b/pymove/utils/data_augmentation.py @@ -11,8 +11,9 @@ instance_crossover_augmentation """ +from __future__ import annotations -from typing import TYPE_CHECKING, Dict, List, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -29,8 +30,8 @@ def append_row( data: DataFrame, - row: Optional[Series] = None, - columns: Optional[Dict] = None + row: Series | None = None, + columns: dict | None = None ): """ Insert a new line in the dataframe with the information passed by parameter. @@ -56,7 +57,7 @@ def append_row( def generate_trajectories_df( - data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'] + data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame' ) -> DataFrame: """ Generates a dataframe with the sequence of location points of a trajectory. @@ -105,7 +106,7 @@ def generate_trajectories_df( def generate_start_feature( - data: DataFrame, label_trajectory: Text = TRAJECTORY + data: DataFrame, label_trajectory: str = TRAJECTORY ): """ Removes the last point from the trajectory and adds it in a new column 'destiny'. @@ -125,7 +126,7 @@ def generate_start_feature( def generate_destiny_feature( - data: DataFrame, label_trajectory: Text = TRAJECTORY + data: DataFrame, label_trajectory: str = TRAJECTORY ): """ Removes the first point from the trajectory and adds it in a new column 'start'. @@ -145,8 +146,8 @@ def generate_destiny_feature( def split_crossover( - sequence_a: List, sequence_b: List, frac: float = 0.5 -) -> Tuple[List, List]: + sequence_a: list, sequence_b: list, frac: float = 0.5 +) -> tuple[list, list]: """ Divides two arrays in the indicated ratio and exchange their halves. @@ -239,9 +240,9 @@ def _augmentation(data: DataFrame, aug_df: DataFrame, frac: float = 0.5): def augmentation_trajectories_df( - data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - restriction: Text = 'destination only', - label_trajectory: Text = TRAJECTORY, + data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + restriction: str = 'destination only', + label_trajectory: str = TRAJECTORY, insert_at_df: bool = False, frac: float = 0.5, ) -> DataFrame: @@ -336,8 +337,8 @@ def insert_points_in_df(data: DataFrame, aug_df: DataFrame): def instance_crossover_augmentation( data: DataFrame, - restriction: Text = 'destination only', - label_trajectory: Text = TRAJECTORY, + restriction: str = 'destination only', + label_trajectory: str = TRAJECTORY, frac: float = 0.5 ): """ diff --git a/pymove/utils/datetime.py b/pymove/utils/datetime.py index 7bc6a61a..9ea32068 100644 --- a/pymove/utils/datetime.py +++ b/pymove/utils/datetime.py @@ -21,9 +21,9 @@ threshold_time_statistics """ +from __future__ import annotations from datetime import datetime -from typing import Optional, Text, Union import holidays from pandas import DataFrame, Timestamp @@ -44,7 +44,7 @@ ) -def date_to_str(dt: datetime) -> Text: +def date_to_str(dt: datetime) -> str: """ Get date, in string format, from timestamp. @@ -73,7 +73,7 @@ def date_to_str(dt: datetime) -> Text: return dt.strftime('%Y-%m-%d') -def str_to_datetime(dt_str: Text) -> datetime: +def str_to_datetime(dt_str: str) -> datetime: """ Converts a datetime in string format to datetime format. @@ -107,7 +107,7 @@ def str_to_datetime(dt_str: Text) -> datetime: return datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S') -def datetime_to_str(dt: datetime) -> Text: +def datetime_to_str(dt: datetime) -> str: """ Converts a date in datetime format to string format. @@ -221,9 +221,9 @@ def to_day_of_week_int(dt: datetime) -> int: def working_day( - dt: Union[Text, datetime], - country: Text = 'BR', - state: Optional[Text] = None + dt: str | datetime, + country: str = 'BR', + state: str | None = None ) -> bool: """ Indices if a day specified by the user is a working day. @@ -280,7 +280,7 @@ def working_day( return result -def now_str() -> Text: +def now_str() -> str: """ Get datetime of now. @@ -298,7 +298,7 @@ def now_str() -> Text: return datetime_to_str(datetime.now()) -def deltatime_str(deltatime_seconds: float) -> Text: +def deltatime_str(deltatime_seconds: float) -> str: """ Convert time in a format appropriate of time. @@ -327,14 +327,14 @@ def deltatime_str(deltatime_seconds: float) -> Text: hours, rem = divmod(deltatime_seconds, 3600) minutes, seconds = divmod(rem, 60) if hours: - return '{:0>2}h:{:0>2}m:{:05.2f}s'.format(int(hours), int(minutes), seconds) + return f'{int(hours):0>2}h:{int(minutes):0>2}m:{seconds:05.2f}s' elif minutes: - return '{:0>2}m:{:05.2f}s'.format(int(minutes), seconds) + return f'{int(minutes):0>2}m:{seconds:05.2f}s' else: - return '{:05.2f}s'.format(seconds) + return f'{seconds:05.2f}s' -def timestamp_to_millis(timestamp: Text) -> int: +def timestamp_to_millis(timestamp: str) -> int: """ Converts a local datetime to a POSIX timestamp in milliseconds (like in Java). @@ -380,7 +380,7 @@ def millis_to_timestamp(milliseconds: float) -> Timestamp: return Timestamp(milliseconds, unit='ms') -def time_to_str(time: Timestamp) -> Text: +def time_to_str(time: Timestamp) -> str: """ Get time, in string format, from timestamp. @@ -403,7 +403,7 @@ def time_to_str(time: Timestamp) -> Text: return time.strftime('%H:%M:%S') -def str_to_time(dt_str: Text) -> datetime: +def str_to_time(dt_str: str) -> datetime: """ Converts a time in string format "%H:%M:%S" to datetime format. @@ -491,10 +491,10 @@ def create_time_slot_in_minute( data: DataFrame, slot_interval: int = 15, initial_slot: int = 0, - label_datetime: Text = DATETIME, - label_time_slot: Text = TIME_SLOT, + label_datetime: str = DATETIME, + label_time_slot: str = TIME_SLOT, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Partitions the time in slot windows. @@ -537,7 +537,7 @@ def create_time_slot_in_minute( 3 39.984224 116.319402 2008-10-23 06:10:15 1 24 """ if data.dtypes[label_datetime] != 'datetime64[ns]': - raise ValueError('{} colum must be of type datetime'.format(label_datetime)) + raise ValueError(f'{label_datetime} colum must be of type datetime') if not inplace: data = data.copy() minute_day = data[label_datetime].dt.hour * 60 + data[label_datetime].dt.minute @@ -548,7 +548,7 @@ def create_time_slot_in_minute( def generate_time_statistics( data: DataFrame, - local_label: Text = LOCAL_LABEL + local_label: str = LOCAL_LABEL ): """ Calculates time statistics of the pairwise local labels. @@ -631,7 +631,7 @@ def _calc_time_threshold(seg_mean: float, seg_std: float) -> float: 0.0 """ threshold = seg_std + seg_mean - threshold = float('{:.1f}'.format(threshold)) + threshold = float(f'{threshold:.1f}') return threshold @@ -640,7 +640,7 @@ def threshold_time_statistics( mean_coef: float = 1.0, std_coef: float = 1.0, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Calculates and creates the threshold column. diff --git a/pymove/utils/distances.py b/pymove/utils/distances.py index ce27dddc..5c81711f 100644 --- a/pymove/utils/distances.py +++ b/pymove/utils/distances.py @@ -8,7 +8,7 @@ medt """ -from typing import Text, Union +from __future__ import annotations import numpy as np import pandas as pd @@ -21,13 +21,13 @@ def haversine( - lat1: Union[float, ndarray], - lon1: Union[float, ndarray], - lat2: Union[float, ndarray], - lon2: Union[float, ndarray], + lat1: float | ndarray, + lon1: float | ndarray, + lat2: float | ndarray, + lon2: float | ndarray, to_radians: bool = True, earth_radius: float = EARTH_RADIUS -) -> Union[float, ndarray]: +) -> float | ndarray: """ Calculates the great circle distance between two points on the earth. @@ -83,11 +83,11 @@ def haversine( def euclidean_distance_in_meters( - lat1: Union[float, ndarray], - lon1: Union[float, ndarray], - lat2: Union[float, ndarray], - lon2: Union[float, ndarray] -) -> Union[float, ndarray]: + lat1: float | ndarray, + lon1: float | ndarray, + lat2: float | ndarray, + lon2: float | ndarray +) -> float | ndarray: """ Calculate the euclidean distance in meters between two points. @@ -130,8 +130,8 @@ def euclidean_distance_in_meters( def nearest_points( traj1: DataFrame, traj2: DataFrame, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, ) -> DataFrame: """ Returns the point closest to another trajectory based on the Euclidean distance. @@ -191,8 +191,8 @@ def nearest_points( def medp( traj1: DataFrame, traj2: DataFrame, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE + latitude: str = LATITUDE, + longitude: str = LONGITUDE ) -> float: """ Returns the Mean Euclidian Distance Predictive between two trajectories. @@ -243,9 +243,9 @@ def medp( def medt( traj1: DataFrame, traj2: DataFrame, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME ) -> float: """ Returns the Mean Euclidian Distance Trajectory between two trajectories. diff --git a/pymove/utils/geoutils.py b/pymove/utils/geoutils.py index 6137e6fd..c7c5086b 100644 --- a/pymove/utils/geoutils.py +++ b/pymove/utils/geoutils.py @@ -7,8 +7,7 @@ decode_geohash_to_latlon, """ - -from typing import Text, Tuple +from __future__ import annotations import geohash2 as gh import numpy as np @@ -30,15 +29,13 @@ BINARY = [ np.asarray( - list('{0:05b}'.format(x)), dtype=int + list(f'{x:05b}'), dtype=int ) for x in range(0, len(BASE_32)) ] - - BASE_32_TO_BIN = dict(zip(BASE_32, BINARY)) -def v_color(ob: BaseGeometry) -> Text: +def v_color(ob: BaseGeometry) -> str: """ Returns '#ffcc33' if object crosses otherwise it returns '#6699cc'. @@ -69,7 +66,7 @@ def v_color(ob: BaseGeometry) -> Text: return COLORS[ob.is_simple + 33] -def _encode(lat: float, lon: float, precision: float = 15) -> Text: +def _encode(lat: float, lon: float, precision: float = 15) -> str: """ Encodes latitude/longitude to geohash. @@ -102,7 +99,7 @@ def _encode(lat: float, lon: float, precision: float = 15) -> Text: return gh.encode(lat, lon, precision) -def _decode(geohash: Text) -> Tuple[float, float]: +def _decode(geohash: str) -> tuple[float, float]: """ Decode geohash to latitude/longitude. @@ -169,7 +166,7 @@ def _bin_geohash(lat: float, lon: float, precision: float = 15) -> ndarray: def _reset_and_create_arrays_none( data: DataFrame, reset_index: bool = True -) -> Tuple[ndarray, ndarray, ndarray, ndarray]: +) -> tuple[ndarray, ndarray, ndarray, ndarray]: """ Reset the df index and create arrays of none values. @@ -310,7 +307,7 @@ def create_bin_geohash_df(data: DataFrame, precision: float = 15): def decode_geohash_to_latlon( data: DataFrame, - label_geohash: Text = GEOHASH, + label_geohash: str = GEOHASH, reset_index: bool = True ): """ @@ -350,7 +347,7 @@ def decode_geohash_to_latlon( 4 39.984217 116.319422 wx4eqyvhyyr2yy8 39.984217 116.319422 """ if label_geohash not in data: - raise ValueError('feature {} not in df'.format(label_geohash)) + raise ValueError(f'feature {label_geohash} not in df') lat, lon, _, _ = _reset_and_create_arrays_none(data, reset_index=reset_index) diff --git a/pymove/utils/integration.py b/pymove/utils/integration.py index 0cedb7ff..44052cef 100644 --- a/pymove/utils/integration.py +++ b/pymove/utils/integration.py @@ -15,9 +15,9 @@ merge_home_with_poi """ +from __future__ import annotations from collections import namedtuple -from typing import List, Optional, Text, Tuple import numpy as np from numpy import ndarray @@ -50,10 +50,10 @@ def union_poi_bank( data: DataFrame, - label_poi: Text = TYPE_POI, - banks: Optional[List[Text]] = None, + label_poi: str = TYPE_POI, + banks: list[str] | None = None, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs the union between the different bank categories. @@ -114,7 +114,7 @@ def union_poi_bank( if not inplace: data = data.copy() logger.debug('union bank categories to one category') - logger.debug('... There are {} -- {}'.format(data[label_poi].nunique(), label_poi)) + logger.debug(f'... There are {data[label_poi].nunique()} -- {label_poi}') if banks is None: banks = [ 'bancos_filiais', @@ -131,10 +131,10 @@ def union_poi_bank( def union_poi_bus_station( data: DataFrame, - label_poi: Text = TYPE_POI, - bus_stations: Optional[List[Text]] = None, + label_poi: str = TYPE_POI, + bus_stations: list[str] | None = None, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs the union between the different bus station categories. @@ -203,10 +203,10 @@ def union_poi_bus_station( def union_poi_bar_restaurant( data: DataFrame, - label_poi: Text = TYPE_POI, - bar_restaurant: Optional[List[Text]] = None, + label_poi: str = TYPE_POI, + bar_restaurant: list[str] | None = None, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs the union between bar and restaurant categories. @@ -270,10 +270,10 @@ def union_poi_bar_restaurant( def union_poi_parks( data: DataFrame, - label_poi: Text = TYPE_POI, - parks: Optional[List[Text]] = None, + label_poi: str = TYPE_POI, + parks: list[str] | None = None, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs the union between park categories. @@ -337,10 +337,10 @@ def union_poi_parks( def union_poi_police( data: DataFrame, - label_poi: Text = TYPE_POI, - police: Optional[List[Text]] = None, + label_poi: str = TYPE_POI, + police: list[str] | None = None, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs the union between police categories. @@ -405,9 +405,9 @@ def union_poi_police( def join_collective_areas( data: DataFrame, areas: DataFrame, - label_geometry: Text = GEOMETRY, + label_geometry: str = GEOMETRY, inplace: bool = False -) -> Optional[DataFrame]: +) -> DataFrame | None: """ Performs the integration between trajectories and collective areas. @@ -487,7 +487,7 @@ def _reset_and_creates_id_and_lat_lon( df_pois: DataFrame, lat_lon_poi: bool = True, reset_index: bool = True -) -> Tuple[ndarray, ndarray, ndarray, ndarray, ndarray]: +) -> tuple[ndarray, ndarray, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -555,8 +555,8 @@ def _reset_and_creates_id_and_lat_lon( def _reset_set_window__and_creates_event_id_type( - data: DataFrame, df_events: DataFrame, time_window: float, label_date: Text = DATETIME -) -> Tuple[Series, Series, ndarray, ndarray, ndarray]: + data: DataFrame, df_events: DataFrame, time_window: float, label_date: str = DATETIME +) -> tuple[Series, Series, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -619,8 +619,8 @@ def _reset_set_window__and_creates_event_id_type( def _reset_set_window_and_creates_event_id_type_all( - data: DataFrame, df_events: DataFrame, time_window: float, label_date: Text = DATETIME -) -> Tuple[Series, Series, ndarray, ndarray, ndarray]: + data: DataFrame, df_events: DataFrame, time_window: float, label_date: str = DATETIME +) -> tuple[Series, Series, ndarray, ndarray, ndarray]: """ Resets the indexes of the dataframes. @@ -685,8 +685,8 @@ def _reset_set_window_and_creates_event_id_type_all( def join_with_pois( data: DataFrame, df_pois: DataFrame, - label_id: Text = TRAJ_ID, - label_poi_name: Text = NAME_POI, + label_id: str = TRAJ_ID, + label_poi_name: str = NAME_POI, reset_index: bool = True, inplace: bool = False ): @@ -806,8 +806,8 @@ def join_with_pois( def join_with_pois_by_category( data: DataFrame, df_pois: DataFrame, - label_category: Text = TYPE_POI, - label_id: Text = TRAJ_ID, + label_category: str = TYPE_POI, + label_id: str = TRAJ_ID, inplace: bool = False ): """ @@ -888,7 +888,7 @@ def join_with_pois_by_category( df_category = df_pois[df_pois[label_category] == c] df_category.reset_index(drop=True, inplace=True) - desc = 'computing dist to {} category ({}/{})'.format(c, i, size_categories) + desc = f'computing dist to {c} category ({i}/{size_categories})' for idx, row in progress_bar(data.iterrows(), total=len(data), desc=desc): lat_user = np.full( df_category.shape[0], row[LATITUDE], dtype=np.float64 @@ -923,10 +923,10 @@ def join_with_pois_by_category( def join_with_events( data: DataFrame, df_events: DataFrame, - label_date: Text = DATETIME, + label_date: str = DATETIME, time_window: int = 900, - label_event_id: Text = EVENT_ID, - label_event_type: Text = EVENT_TYPE, + label_event_id: str = EVENT_ID, + label_event_type: str = EVENT_TYPE, inplace: bool = False ): """ @@ -1070,9 +1070,9 @@ def join_with_events( def join_with_event_by_dist_and_time( data: DataFrame, df_events: DataFrame, - label_date: Text = DATETIME, - label_event_id: Text = EVENT_ID, - label_event_type: Text = EVENT_TYPE, + label_date: str = DATETIME, + label_event_id: str = EVENT_ID, + label_event_type: str = EVENT_TYPE, time_window: float = 3600, radius: float = 1000, inplace: bool = False @@ -1219,9 +1219,9 @@ def join_with_event_by_dist_and_time( def join_with_home_by_id( data: DataFrame, df_home: DataFrame, - label_id: Text = TRAJ_ID, - label_address: Text = ADDRESS, - label_city: Text = CITY, + label_id: str = TRAJ_ID, + label_address: str = ADDRESS, + label_city: str = CITY, drop_id_without_home: bool = False, inplace: bool = False ): @@ -1288,7 +1288,7 @@ def join_with_home_by_id( ids_without_home = [] if data.index.name is None: - logger.debug('...setting {} as index'.format(label_id)) + logger.debug(f'...setting {label_id} as index') data.set_index(label_id, inplace=True) for idx in progress_bar( @@ -1297,7 +1297,7 @@ def join_with_home_by_id( filter_home = df_home[label_id] == idx if df_home[filter_home].shape[0] == 0: - logger.debug('...id: {} has not HOME'.format(idx)) + logger.debug(f'...id: {idx} has not HOME') ids_without_home.append(idx) else: home = df_home[filter_home].iloc[0] @@ -1338,11 +1338,11 @@ def join_with_home_by_id( def merge_home_with_poi( data: DataFrame, - label_dist_poi: Text = DIST_POI, - label_name_poi: Text = NAME_POI, - label_id_poi: Text = ID_POI, - label_home: Text = HOME, - label_dist_home: Text = DIST_HOME, + label_dist_poi: str = DIST_POI, + label_name_poi: str = NAME_POI, + label_id_poi: str = ID_POI, + label_home: str = HOME, + label_dist_home: str = DIST_HOME, drop_columns: bool = True, inplace: bool = False ): diff --git a/pymove/utils/log.py b/pymove/utils/log.py index c8fed867..2603594c 100644 --- a/pymove/utils/log.py +++ b/pymove/utils/log.py @@ -6,12 +6,13 @@ timer_decorator """ +from __future__ import annotations import logging import os import time from functools import wraps -from typing import Callable, Iterable, Optional, Text +from typing import Callable, Iterable from IPython import get_ipython from IPython.display import display @@ -42,7 +43,7 @@ def wrapper(*args, **kwargs): t_start = time.time() result = func(*args, **kwargs) t_total = deltatime_str(time.time() - t_start) - message = '%s took %s' % (func.__name__, t_total) + message = f'{func.__name__} took {t_total}' logger.debug('{}\n{}\n{}'.format('*' * len(message), message, '*' * len(message))) return result @@ -51,9 +52,9 @@ def wrapper(*args, **kwargs): def _log_progress( sequence: Iterable, - desc: Optional[Text] = None, - total: Optional[int] = None, - miniters: Optional[int] = None + desc: str | None = None, + total: int | None = None, + miniters: int | None = None ): """ Make and display a progress bar. @@ -102,10 +103,10 @@ def _log_progress( for index, record in enumerate(sequence, 1): if index == 1 or index % miniters == 0: if is_iterator: - label.value = '%s: %s / ?' % (desc, index) + label.value = f'{desc}: {index} / ?' else: progress.value = index - label.value = u'%s: %s / %s' % (desc, index, total) + label.value = f'{desc}: {index} / {total}' yield record except Exception: progress.bar_style = 'danger' @@ -113,7 +114,7 @@ def _log_progress( else: progress.bar_style = 'success' progress.value = index - label.value = '%s: %s' % (desc, str(index or '?')) + label.value = '{}: {}'.format(desc, str(index or '?')) try: @@ -127,9 +128,9 @@ def _log_progress( def progress_bar( sequence: Iterable, - desc: Optional[Text] = None, - total: Optional[int] = None, - miniters: Optional[int] = None + desc: str | None = None, + total: int | None = None, + miniters: int | None = None ): """ Make and display a progress bar. diff --git a/pymove/utils/math.py b/pymove/utils/math.py index 14dd4b94..f03b3e80 100644 --- a/pymove/utils/math.py +++ b/pymove/utils/math.py @@ -11,12 +11,12 @@ interpolation """ +from __future__ import annotations import math -from typing import List, Optional, Tuple, Union -def is_number(value: Union[int, float, str]): +def is_number(value: int | float | str): """ Returns if value is numerical or not. @@ -49,7 +49,7 @@ def is_number(value: Union[int, float, str]): return True -def std(values_array: List[float]) -> float: +def std(values_array: list[float]) -> float: """ Compute standard deviation. @@ -77,12 +77,12 @@ def std(values_array: List[float]) -> float: """ size = len(values_array) mean = sum(values_array) / size - sum_sq = sum([(i - mean) * (i - mean) for i in values_array]) + sum_sq = sum((i - mean) * (i - mean) for i in values_array) return math.sqrt(sum_sq / size) -def avg_std(values_array: List[float]) -> Tuple[float, float]: +def avg_std(values_array: list[float]) -> tuple[float, float]: """ Compute the average of standard deviation. @@ -109,7 +109,7 @@ def avg_std(values_array: List[float]) -> Tuple[float, float]: return avg, std(values_array) -def std_sample(values_array: List[float]) -> float: +def std_sample(values_array: list[float]) -> float: """ Compute the standard deviation of sample. @@ -134,7 +134,7 @@ def std_sample(values_array: List[float]) -> float: return std(values_array) * math.sqrt(size / (size - 1)) -def avg_std_sample(values_array: List[float]) -> Tuple[float, float]: +def avg_std_sample(values_array: list[float]) -> tuple[float, float]: """ Compute the average of standard deviation of sample. @@ -162,7 +162,7 @@ def avg_std_sample(values_array: List[float]) -> Tuple[float, float]: def arrays_avg( - values_array: List[float], weights_array: Optional[List[float]] = None + values_array: list[float], weights_array: list[float] | None = None ) -> float: """ Computes the mean of the elements of the array. @@ -211,7 +211,7 @@ def arrays_avg( return result / n -def array_stats(values_array: List[float]) -> Tuple[float, float, int]: +def array_stats(values_array: list[float]) -> tuple[float, float, int]: """ Computes statistics about the array. diff --git a/pymove/utils/mem.py b/pymove/utils/mem.py index f67b26ae..c5194484 100644 --- a/pymove/utils/mem.py +++ b/pymove/utils/mem.py @@ -9,6 +9,7 @@ top_mem_vars """ +from __future__ import annotations import os import re @@ -16,7 +17,6 @@ from collections import deque from itertools import chain from sys import getsizeof -from typing import Dict, Text import numpy as np import psutil @@ -52,7 +52,7 @@ def reduce_mem_usage_automatic(df: DataFrame): dtype: object """ start_mem = df.memory_usage().sum() / 1024 ** 2 - logger.info('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) + logger.info(f'Memory usage of dataframe is {start_mem:.2f} MB') for col in df.columns: col_type = df[col].dtype @@ -113,14 +113,14 @@ def reduce_mem_usage_automatic(df: DataFrame): df[col] = df[col].astype(np.float64) end_mem = df.memory_usage().sum() / 1024 ** 2 - logger.info('Memory usage after optimization is: {:.2f} MB'.format(end_mem)) + logger.info(f'Memory usage after optimization is: {end_mem:.2f} MB') logger.info( - 'Decreased by {:.1f} %'.format(100 * (start_mem - end_mem) / start_mem) + f'Decreased by {100 * (start_mem - end_mem) / start_mem:.1f} %' ) def total_size( - o: object, handlers: Dict = None, verbose: bool = True + o: object, handlers: dict = None, verbose: bool = True ) -> float: """ Calculates the approximate memory footprint of an given object. @@ -195,14 +195,14 @@ def sizeof(o): if verbose: - logger.info('Size in bytes: {}, Type: {}'.format(s, type(o))) + logger.info(f'Size in bytes: {s}, Type: {type(o)}') return s return sizeof(o) -def begin_operation(name: Text) -> Dict: +def begin_operation(name: str) -> dict: """ Gets the stats for the current operation. @@ -233,7 +233,7 @@ def begin_operation(name: Text) -> Dict: return {'process': process, 'init': init, 'start': start, 'name': name} -def end_operation(operation: Dict) -> Dict: +def end_operation(operation: dict) -> dict: """ Gets the time and memory usage of the operation. @@ -269,7 +269,7 @@ def end_operation(operation: Dict) -> Dict: } -def sizeof_fmt(mem_usage: float, suffix: Text = 'B') -> Text: +def sizeof_fmt(mem_usage: float, suffix: str = 'B') -> str: """ Returns the memory usage calculation of the last function. @@ -295,13 +295,13 @@ def sizeof_fmt(mem_usage: float, suffix: Text = 'B') -> Text: """ for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: if abs(mem_usage) < 1024.0: - return '{:3.1f} {}{}'.format(mem_usage, unit, suffix) + return f'{mem_usage:3.1f} {unit}{suffix}' mem_usage /= 1024.0 return '{:.1f} {}{}'.format(mem_usage, 'Yi', suffix) def top_mem_vars( - variables: Dict, n: int = 10, hide_private=True + variables: dict, n: int = 10, hide_private=True ) -> DataFrame: """ Shows the sizes of the active variables. diff --git a/pymove/utils/trajectories.py b/pymove/utils/trajectories.py index 7b2036a9..bfa989b5 100644 --- a/pymove/utils/trajectories.py +++ b/pymove/utils/trajectories.py @@ -11,10 +11,10 @@ column_to_array """ - +from __future__ import annotations from itertools import chain -from typing import Any, Dict, List, Optional, Text, Tuple, Union +from typing import Any import numpy as np from numpy import ndarray @@ -29,11 +29,11 @@ def read_csv( filepath_or_buffer: FilePathOrBuffer, - latitude: Text = LATITUDE, - longitude: Text = LONGITUDE, - datetime: Text = DATETIME, - traj_id: Text = TRAJ_ID, - type_: Text = TYPE_PANDAS, + latitude: str = LATITUDE, + longitude: str = LONGITUDE, + datetime: str = DATETIME, + traj_id: str = TRAJ_ID, + type_: str = TYPE_PANDAS, n_partitions: int = 1, **kwargs ) -> MoveDataFrame: @@ -94,7 +94,7 @@ def read_csv( ) -def invert_dict(d: Dict) -> Dict: +def invert_dict(d: dict) -> dict: """ Inverts the key:value relation of a dictionary. @@ -119,10 +119,10 @@ def invert_dict(d: Dict) -> Dict: def flatten_dict( - d: Dict, - parent_key: Text = '', - sep: Text = '_' -) -> Dict: + d: dict, + parent_key: str = '', + sep: str = '_' +) -> dict: """ Flattens a nested dictionary. @@ -153,7 +153,7 @@ def flatten_dict( """ if not isinstance(d, dict): return {parent_key: d} - items: List[Tuple[Text, Any]] = [] + items: list[tuple[str, Any]] = [] for k, v in d.items(): new_key = f'{parent_key}{sep}{k}' if parent_key else k if isinstance(v, dict): @@ -163,7 +163,7 @@ def flatten_dict( return dict(items) -def flatten_columns(data: DataFrame, columns: List) -> DataFrame: +def flatten_columns(data: DataFrame, columns: list) -> DataFrame: """ Transforms columns containing dictionaries in individual columns. @@ -223,9 +223,9 @@ def flatten_columns(data: DataFrame, columns: List) -> DataFrame: def shift( - arr: Union[List, Series, ndarray], + arr: list | Series | ndarray, num: int, - fill_value: Optional[Any] = None + fill_value: Any | None = None ) -> ndarray: """ Shifts the elements of the given array by the number of periods specified. @@ -288,7 +288,7 @@ def shift( return result -def fill_list_with_new_values(original_list: List, new_list_values: List): +def fill_list_with_new_values(original_list: list, new_list_values: list): """ Copies elements from one list to another. @@ -314,7 +314,7 @@ def fill_list_with_new_values(original_list: List, new_list_values: List): original_list[:n] = new_list_values -def object_for_array(object_: Text) -> ndarray: +def object_for_array(object_: str) -> ndarray: """ Transforms an object into an array. @@ -346,7 +346,7 @@ def object_for_array(object_: Text) -> ndarray: return conv.astype('object_') -def column_to_array(data: DataFrame, column: Text) -> DataFrame: +def column_to_array(data: DataFrame, column: str) -> DataFrame: """ Transforms all columns values to list. diff --git a/pymove/utils/visual.py b/pymove/utils/visual.py index 80d0a36f..4f10bd5e 100644 --- a/pymove/utils/visual.py +++ b/pymove/utils/visual.py @@ -10,8 +10,9 @@ save_wkt """ +from __future__ import annotations -from typing import Sequence, Text, Tuple, Union +from typing import Sequence from branca.element import MacroElement, Template from folium import Map @@ -23,7 +24,7 @@ from pymove.utils.constants import COLORS, LATITUDE, LONGITUDE, TRAJ_ID -def add_map_legend(m: Map, title: Text, items: Union[Tuple, Sequence[Tuple]]): +def add_map_legend(m: Map, title: str, items: tuple | Sequence[tuple]): """ Adds a legend for a folium map. @@ -202,7 +203,7 @@ def add_map_legend(m: Map, title: Text, items: Union[Tuple, Sequence[Tuple]]): m.get_root().add_child(macro, name='map_legend') -def generate_color() -> Text: +def generate_color() -> str: """ Generates a random color. @@ -221,7 +222,7 @@ def generate_color() -> Text: return COLORS[randint(0, len(COLORS))] -def rgb(rgb_colors: Tuple[float, float, float]) -> Tuple[int, int, int]: +def rgb(rgb_colors: tuple[float, float, float]) -> tuple[int, int, int]: """ Return a tuple of integers, as used in AWT/Java plots. @@ -250,7 +251,7 @@ def rgb(rgb_colors: Tuple[float, float, float]) -> Tuple[int, int, int]: return int(red * 255), int(green * 255), int(blue * 255) -def hex_rgb(rgb_colors: Tuple[float, float, float]) -> Text: +def hex_rgb(rgb_colors: tuple[float, float, float]) -> str: """ Return a hex str, as used in Tk plots. @@ -276,7 +277,7 @@ def hex_rgb(rgb_colors: Tuple[float, float, float]) -> Text: return '#%02X%02X%02X' % rgb(rgb_colors) -def cmap_hex_color(cmap: ListedColormap, i: int) -> Text: +def cmap_hex_color(cmap: ListedColormap, i: int) -> str: """ Convert a Colormap to hex color. @@ -305,7 +306,7 @@ def cmap_hex_color(cmap: ListedColormap, i: int) -> Text: return rgb2hex(cmap(i)) -def get_cmap(cmap: Text) -> Colormap: +def get_cmap(cmap: str) -> Colormap: """ Returns a matplotlib colormap instance. @@ -329,7 +330,7 @@ def get_cmap(cmap: Text) -> Colormap: def save_wkt( - move_data: DataFrame, filename: Text, label_id: Text = TRAJ_ID + move_data: DataFrame, filename: str, label_id: str = TRAJ_ID ): """ Save a visualization in a map in a new file .wkt. @@ -370,7 +371,7 @@ def save_wkt( move_df = move_data[move_data[label_id] == id_] curr_str = '%s;LINESTRING(' % id_ curr_str += ','.join( - '%s %s' % (x[0], x[1]) + f'{x[0]} {x[1]}' for x in move_df[[LONGITUDE, LATITUDE]].values ) curr_str += ')\n' diff --git a/pymove/visualization/folium.py b/pymove/visualization/folium.py index 3e057d9b..07bf43b6 100644 --- a/pymove/visualization/folium.py +++ b/pymove/visualization/folium.py @@ -22,9 +22,10 @@ plot_traj_timestamp_geo_json """ +from __future__ import annotations from datetime import date -from typing import Any, Dict, List, Optional, Sequence, Text, Tuple, Union +from typing import Any, Sequence import folium import numpy as np @@ -38,11 +39,9 @@ DATE, DATETIME, DAY, - EVENT_ID, EVENT_POINT, HOUR, LATITUDE, - LINE_COLOR, LONGITUDE, PERIOD, POI_POINT, @@ -50,7 +49,6 @@ STOP, TILES, TRAJ_ID, - UID, USER_POINT, ) from pymove.utils.datetime import str_to_datetime @@ -60,12 +58,12 @@ def save_map( move_data: DataFrame, - filename: Text, - tiles: Text = TILES[0], - label_id: Text = TRAJ_ID, - cmap: Text = 'Set1', + filename: str, + tiles: str = TILES[0], + label_id: str = TRAJ_ID, + cmap: str = 'Set1', return_map: bool = False -) -> Optional[Map]: +) -> Map | None: """ Save a visualization in a map in a new file. @@ -130,9 +128,9 @@ def save_map( def create_base_map( move_data: DataFrame, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, - tile: Text = TILES[0], + lat_origin: float | None = None, + lon_origin: float | None = None, + tile: str = TILES[0], default_zoom_start: float = 12, ) -> Map: """ @@ -182,15 +180,15 @@ def create_base_map( def heatmap( move_data: DataFrame, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, radius: float = 8, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - filename: Text = 'heatmap.html', + filename: str = 'heatmap.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -267,17 +265,17 @@ def heatmap( def heatmap_with_time( move_data: DataFrame, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, radius: float = 8, min_opacity: float = 0.5, max_opacity: float = 0.8, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - filename: Text = 'heatmap_time.html', + filename: str = 'heatmap_time.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -371,14 +369,14 @@ def heatmap_with_time( def cluster( move_data: DataFrame, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - filename: Text = 'cluster.html', + filename: str = 'cluster.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -461,14 +459,14 @@ def cluster( def faster_cluster( move_data: DataFrame, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - filename: Text = 'faster_cluster.html', + filename: str = 'faster_cluster.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -546,14 +544,14 @@ def faster_cluster( def plot_markers( move_data: DataFrame, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - filename: Text = 'markers.html', + filename: str = 'markers.html', ) -> Map: """ Generate visualization of Heat Map using folium plugin. @@ -646,11 +644,11 @@ def plot_markers( def _filter_and_generate_colors( move_data: DataFrame, - id_: Optional[int] = None, - n_rows: Optional[int] = None, - color: Optional[Union[Text, List[Text]]] = None, - color_by_id: Optional[Dict] = None -) -> Tuple[DataFrame, List[Tuple[Any, Any]]]: + id_: int | None = None, + n_rows: int | None = None, + color: str | list[str] | None = None, + color_by_id: dict | None = None +) -> tuple[DataFrame, list[tuple[Any, Any]]]: """ Filters the dataframe and generate colors for folium map. @@ -744,7 +742,7 @@ def _filter_and_generate_colors( def _filter_generated_feature( - move_data: DataFrame, feature: Text, values: Any + move_data: DataFrame, feature: str, values: Any ) -> DataFrame: """ Filters the values from the dataframe. @@ -795,8 +793,8 @@ def _filter_generated_feature( def _add_begin_end_markers_to_map( move_data: DataFrame, base_map: Map, - color: Optional[Text] = None, - _id: Optional[int] = None + color: str | None = None, + _id: int | None = None ): """ Adds markers to the beggining and end of a trajectory. @@ -857,11 +855,11 @@ def _add_begin_end_markers_to_map( def _add_trajectories_to_map( move_data: DataFrame, - items: Sequence[Tuple], + items: Sequence[tuple], base_map: Map, legend: bool = True, save_as_html: bool = True, - filename: Text = 'map.html', + filename: str = 'map.html', ): """ Adds a trajectory to a folium map with begin and end markers. @@ -914,17 +912,17 @@ def _add_trajectories_to_map( def plot_trajectories( move_data: DataFrame, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - color_by_id: Optional[Dict] = None, - filename: Text = 'plot_trajectories.html', + color: str | list[str] | None = None, + color_by_id: dict | None = None, + filename: str = 'plot_trajectories.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1003,16 +1001,16 @@ def plot_trajectories( def plot_trajectory_by_id( move_data: DataFrame, id_: int, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - filename: Text = 'plot_trajectories.html', + color: str | list[str] | None = None, + filename: str = 'plot_trajectories.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1091,19 +1089,19 @@ def plot_trajectory_by_id( def plot_trajectory_by_period( move_data: PandasMoveDataFrame, - period: Text, - id_: Optional[int] = None, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + period: str, + id_: int | None = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - color_by_id: Optional[Dict] = None, - filename: Text = 'plot_trajectories_by_period.html', + color: str | list[str] | None = None, + color_by_id: dict | None = None, + filename: str = 'plot_trajectories_by_period.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1196,19 +1194,19 @@ def plot_trajectory_by_period( def plot_trajectory_by_day_week( move_data: PandasMoveDataFrame, - day_week: Text, - id_: Optional[int] = None, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + day_week: str, + id_: int | None = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - color_by_id: Optional[Dict] = None, - filename: Text = 'plot_trajectories_by_day_week.html', + color: str | list[str] | None = None, + color_by_id: dict | None = None, + filename: str = 'plot_trajectories_by_day_week.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1301,20 +1299,20 @@ def plot_trajectory_by_day_week( def plot_trajectory_by_date( move_data: PandasMoveDataFrame, - start_date: Union[Text, date], - end_date: Union[Text, date], - id_: Optional[int] = None, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + start_date: str | date, + end_date: str | date, + id_: int | None = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - color_by_id: Optional[Dict] = None, - filename: Text = 'plot_trajectories_by_date.html', + color: str | list[str] | None = None, + color_by_id: dict | None = None, + filename: str = 'plot_trajectories_by_date.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1419,20 +1417,20 @@ def plot_trajectory_by_date( def plot_trajectory_by_hour( move_data: PandasMoveDataFrame, - start_hour: Text, - end_hour: Text, - id_: Optional[int] = None, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + start_hour: str, + end_hour: str, + id_: int | None = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - color_by_id: Optional[Dict] = None, - filename: Text = 'plot_trajectories_by_hour.html', + color: str | list[str] | None = None, + color_by_id: dict | None = None, + filename: str = 'plot_trajectories_by_hour.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1529,17 +1527,17 @@ def plot_stops( move_data: PandasMoveDataFrame, radius: float = 0, weight: float = 3, - id_: Optional[int] = None, - n_rows: Optional[int] = None, - lat_origin: Optional[float] = None, - lon_origin: Optional[float] = None, + id_: int | None = None, + n_rows: int | None = None, + lat_origin: float | None = None, + lon_origin: float | None = None, zoom_start: float = 12, legend: bool = True, - base_map: Optional[Map] = None, - tile: Text = TILES[0], + base_map: Map | None = None, + tile: str = TILES[0], save_as_html: bool = False, - color: Optional[Union[Text, List[Text]]] = None, - filename: Text = 'plot_stops.html', + color: str | list[str] | None = None, + filename: str = 'plot_stops.html', ) -> Map: """ Generate visualization of all trajectories with folium. @@ -1649,12 +1647,12 @@ def plot_stops( def plot_bbox( - bbox_tuple: Tuple[float, float, float, float], - base_map: Optional[Map] = None, - tiles: Text = TILES[0], - color: Text = 'red', + bbox_tuple: tuple[float, float, float, float], + base_map: Map | None = None, + tiles: str = TILES[0], + color: str = 'red', save_as_html: bool = False, - filename: Text = 'bbox.html' + filename: str = 'bbox.html' ) -> Map: """ Plots a bbox using Folium. @@ -1707,7 +1705,7 @@ def plot_bbox( return base_map -def _format_tags(line: Union[List, Dict], slice_: List) -> Text: +def _format_tags(line: list | dict, slice_: list) -> str: """ Create or format tags. @@ -1742,17 +1740,17 @@ def _format_tags(line: Union[List, Dict], slice_: List) -> Text: >>> ) lat: 39.984094
    lon: 116.319236
    datetime: 2008-10-23 05:53:05
    id: 1 """ - map_formated_tags = map(lambda tag: '{}: {}'.format(tag, line[tag]), slice_) + map_formated_tags = map(lambda tag: f'{tag}: {line[tag]}', slice_) return '
    '.join(map_formated_tags) def _circle_maker( iter_tuple: DataFrame, - user_lat: Text, - user_lon: Text, - slice_tags: List, - user_point: Text, + user_lat: str, + user_lon: str, + slice_tags: list, + user_point: str, radius: float, map_: Map ): @@ -1814,15 +1812,15 @@ def _circle_maker( def plot_points( move_data: DataFrame, - user_lat: Text = LATITUDE, - user_lon: Text = LONGITUDE, - user_point: Text = USER_POINT, + user_lat: str = LATITUDE, + user_lon: str = LONGITUDE, + user_point: str = USER_POINT, radius: float = 2, - base_map: Optional[Map] = None, - slice_tags: Optional[List] = None, - tiles: Text = TILES[0], + base_map: Map | None = None, + slice_tags: list | None = None, + tiles: str = TILES[0], save_as_html: bool = False, - filename: Text = 'points.html' + filename: str = 'points.html' ) -> Map: """ Generates a folium map with the trajectories plots and a point. @@ -1901,15 +1899,15 @@ def plot_points( def plot_poi( move_data: DataFrame, - poi_lat: Text = LATITUDE, - poi_lon: Text = LONGITUDE, - poi_point: Text = POI_POINT, + poi_lat: str = LATITUDE, + poi_lon: str = LONGITUDE, + poi_point: str = POI_POINT, radius: float = 2, - base_map: Optional[Map] = None, - slice_tags: Optional[List] = None, - tiles: Text = TILES[0], + base_map: Map | None = None, + slice_tags: list | None = None, + tiles: str = TILES[0], save_as_html: bool = False, - filename: Text = 'pois.html' + filename: str = 'pois.html' ) -> Map: """ Receives a MoveDataFrame and returns a folium map with poi points. @@ -1970,15 +1968,15 @@ def plot_poi( def plot_event( move_data: DataFrame, - event_lat: Text = LATITUDE, - event_lon: Text = LONGITUDE, - event_point: Text = EVENT_POINT, + event_lat: str = LATITUDE, + event_lon: str = LONGITUDE, + event_point: str = EVENT_POINT, radius: float = 2, - base_map: Optional[Map] = None, - slice_tags: Optional[List] = None, - tiles: Text = TILES[0], + base_map: Map | None = None, + slice_tags: list | None = None, + tiles: str = TILES[0], save_as_html: bool = False, - filename: Text = 'events.html' + filename: str = 'events.html' ) -> Map: """ Receives a MoveDataFrame and returns a folium map with events. @@ -2036,10 +2034,10 @@ def plot_event( def _create_geojson_features_line( move_data: DataFrame, - label_lat: Text = LATITUDE, - label_lon: Text = LONGITUDE, - label_datetime: Text = DATETIME -) -> List: + label_lat: str = LATITUDE, + label_lon: str = LONGITUDE, + label_datetime: str = DATETIME +) -> list: """ Create geojson features. @@ -2155,12 +2153,12 @@ def _create_geojson_features_line( def plot_traj_timestamp_geo_json( move_data: DataFrame, - label_lat: Text = LATITUDE, - label_lon: Text = LONGITUDE, - label_datetime: Text = DATETIME, - tiles: Text = TILES[0], + label_lat: str = LATITUDE, + label_lon: str = LONGITUDE, + label_datetime: str = DATETIME, + tiles: str = TILES[0], save_as_html: bool = False, - filename: Text = 'events.html' + filename: str = 'events.html' ) -> Map: """ Plot trajectories wit geo_json. diff --git a/pymove/visualization/matplotlib.py b/pymove/visualization/matplotlib.py index ec45a4dd..e72de5ba 100644 --- a/pymove/visualization/matplotlib.py +++ b/pymove/visualization/matplotlib.py @@ -4,14 +4,16 @@ show_object_id_by_date, plot_trajectories, plot_trajectory_by_id, +plot_grid_polygons, plot_all_features plot_coords, plot_bounds, plot_line """ +from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Text, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable import matplotlib.pyplot as plt from matplotlib.pyplot import axes, figure @@ -19,6 +21,7 @@ from shapely.geometry import LineString, MultiLineString from shapely.geometry.base import BaseGeometry +from pymove.core.grid import Grid from pymove.utils.constants import ( DATE, DAY, @@ -26,7 +29,7 @@ LATITUDE, LONGITUDE, PERIOD, - TID, + POLYGON, TRAJ_ID, ) @@ -36,13 +39,13 @@ def show_object_id_by_date( - move_data: Union['PandasMoveDataFrame', 'DaskMoveDataFrame'], - kind: Optional[List] = None, - figsize: Tuple[float, float] = (21, 9), + move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame', + kind: list | None = None, + figsize: tuple[float, float] = (21, 9), return_fig: bool = False, save_fig: bool = False, - name: Text = 'shot_points_by_date.png', -) -> Optional[figure]: + name: str = 'shot_points_by_date.png', +) -> figure | None: """ Generates four visualizations based on datetime feature. @@ -128,13 +131,13 @@ def show_object_id_by_date( def plot_trajectories( move_data: DataFrame, - markers: Text = 'o', + markers: str = 'o', markersize: float = 12, - figsize: Tuple[float, float] = (10, 10), + figsize: tuple[float, float] = (10, 10), return_fig: bool = False, save_fig: bool = False, - name: Text = 'trajectories.png', -) -> Optional[figure]: + name: str = 'trajectories.png', +) -> figure | None: """ Generate a visualization that show trajectories. @@ -193,17 +196,17 @@ def plot_trajectories( def plot_trajectory_by_id( move_data: DataFrame, - id_: Union[int, Text], - label: Text = TID, - feature: Optional[Text] = None, - value: Optional[Any] = None, + id_: int | str, + label: str = TRAJ_ID, + feature: str | None = None, + value: Any | None = None, linewidth: float = 3, markersize: float = 20, - figsize: Tuple[float, float] = (10, 10), + figsize: tuple[float, float] = (10, 10), return_fig: bool = False, save_fig: bool = False, - name: Optional[Text] = None, -) -> Optional[figure]: + name: str | None = None, +) -> figure | None: """ Generate a visualization that shows a trajectory with the specified tid. @@ -302,14 +305,83 @@ def plot_trajectory_by_id( return fig +def plot_grid_polygons( + data: DataFrame, + grid: Grid | None = None, + markersize: float = 10, + linewidth: float = 2, + figsize: tuple[int, int] = (10, 10), + return_fig: bool = False, + save_fig: bool = False, + name: str = 'grid.png', +) -> figure | None: + """ + Generate a visualization with grid polygons. + + Parameters + ---------- + data : DataFrame + Input trajectory data + markersize : float, optional + Represents visualization size marker, by default 10 + linewidth : float, optional + Represents visualization size line, by default 2 + figsize : tuple(int, int), optional + Represents the size (float: width, float: height) of a figure, + by default (10, 10) + return_fig : bool, optional + Represents whether or not to save the generated picture, by default False + save_fig : bool, optional + Wether to save the figure, by default False + name : str, optional + Represents name of a file, by default 'grid.png' + + Returns + ------- + figure + The generated picture or None + + Raises + ------ + If the dataframe does not contains the POLYGON feature + IndexError + If there is no user with the id passed + + """ + if POLYGON not in data: + if grid is None: + raise KeyError('POLYGON feature not in dataframe') + data = grid.create_all_polygons_to_all_point_on_grid(data) + + data = data.copy() + + data.dropna(subset=[POLYGON], inplace=True) + + fig = plt.figure(figsize=figsize) + + for _, row in data.iterrows(): + xs, ys = row[POLYGON].exterior.xy + plt.plot(ys, xs, 'g', linewidth=linewidth, markersize=markersize) + xs_start, ys_start = data.iloc[0][POLYGON].exterior.xy + xs_end, ys_end = data.iloc[-1][POLYGON].exterior.xy + plt.plot(ys_start, xs_start, 'bo', markersize=markersize * 1.5) + plt.plot(ys_end, xs_end, 'bX', markersize=markersize * 1.5) + + if save_fig: + plt.savefig(fname=name) + + if return_fig: + return fig + + def plot_all_features( move_data: DataFrame, dtype: Callable = float, - figsize: Tuple[float, float] = (21, 15), + figsize: tuple[float, float] = (21, 15), return_fig: bool = False, save_fig: bool = False, - name: Text = 'features.png', -) -> Optional[figure]: + name: str = 'features.png', +) -> figure | None: """ Generate a visualization for each columns that type is equal dtype. @@ -369,7 +441,7 @@ def plot_all_features( return fig -def plot_coords(ax: axes, ob: BaseGeometry, color: Text = 'r'): +def plot_coords(ax: axes, ob: BaseGeometry, color: str = 'r'): """ Plot the coordinates of each point of the object in a 2D chart. @@ -394,7 +466,7 @@ def plot_coords(ax: axes, ob: BaseGeometry, color: Text = 'r'): ax.plot(x, y, 'o', color=color, zorder=1) -def plot_bounds(ax: axes, ob: Union[LineString, MultiLineString], color='b'): +def plot_bounds(ax: axes, ob: LineString | MultiLineString, color='b'): """ Plot the limits of geometric object. @@ -422,10 +494,10 @@ def plot_bounds(ax: axes, ob: Union[LineString, MultiLineString], color='b'): def plot_line( ax: axes, ob: LineString, - color: Text = 'r', + color: str = 'r', alpha: float = 0.7, linewidth: float = 3, - solid_capstyle: Text = 'round', + solid_capstyle: str = 'round', zorder: float = 2 ): """ From eab12c22b1412393a9e25a79f1580597a9e76a12 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 17:57:26 -0300 Subject: [PATCH 49/56] added multi python version testing --- .github/workflows/lint_and_test.yml | 53 ++++++++++++----------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index e87807ea..4b1eaa4a 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -2,37 +2,26 @@ name: Lint and Test on: [push] jobs: - lint: - name: Code Linting + lint-test: + name: Lint and Test runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8, 3.9,.3.10] steps: - - uses: actions/checkout@main - - name: Set up Python 3.7 - uses: actions/setup-python@main - with: - python-version: 3.7 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - make dev - - name: Lint - working-directory: ${{ github.workspace }} - run: | - make lint - test: - name: Code Testing - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@main - - name: Set up Python 3.7 - uses: actions/setup-python@main - with: - python-version: 3.7 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - make dev - - name: Test - working-directory: ${{ github.workspace }} - run: | - make test + - uses: actions/checkout@main + - uses: actions/setup-python@main + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + make dev + - name: Check code style + working-directory: ${{ github.workspace }} + run: | + make lint + - name: Runs unit tests + working-directory: ${{ github.workspace }} + run: | + make test From ac9298a877ee298ca5efe3a76f403fba10e9e95b Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 17:58:26 -0300 Subject: [PATCH 50/56] fix typo in version --- .github/workflows/lint_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index 4b1eaa4a..ec478ce3 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9,.3.10] + python-version: [3.7, 3.8, 3.9, 3.10] steps: - uses: actions/checkout@main - uses: actions/setup-python@main From 7943f2f124fe26790e140a32ba323cee5d285bca Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 18:00:45 -0300 Subject: [PATCH 51/56] changed versions to string --- .github/workflows/lint_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index ec478ce3..d0cb62ed 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, 3.10] + python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@main - uses: actions/setup-python@main From 2350cdfd90aba71cfe659bba75ce218395ca9109 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 18:02:08 -0300 Subject: [PATCH 52/56] removed python 3.10 --- .github/workflows/lint_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index d0cb62ed..ce999458 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.7', '3.8', '3.9'] steps: - uses: actions/checkout@main - uses: actions/setup-python@main From 114ac309250d5aace2db293357f8cbdbe18e825b Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 18:36:03 -0300 Subject: [PATCH 53/56] changed tagging from bump2version, removed warning on last_operation --- pymove/core/pandas.py | 3 +-- setup.cfg | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pymove/core/pandas.py b/pymove/core/pandas.py index e5da791d..13a25eaf 100644 --- a/pymove/core/pandas.py +++ b/pymove/core/pandas.py @@ -116,9 +116,8 @@ def __init__( MoveDataFrame.validate_move_data_frame(tdf) super().__init__(tdf) self._type = TYPE_PANDAS - self.last_operation: dict = dict() + self.last_operation: dict = None # type: ignore[assignment] else: - raise KeyError( 'Couldn\'t instantiate MoveDataFrame because data has missing columns.' ) diff --git a/setup.cfg b/setup.cfg index 6fe2d13e..e38f6dae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,6 @@ [bumpversion] current_version = 2.7.2 -allow_dirty = True -tag_name = version-{new_version} -tag = True +tag_name = {new_version} commit = True [bumpversion:file:pymove/__init__.py] @@ -20,6 +18,7 @@ docstring-convention = numpy [mypy] ignore_missing_imports = True no_warn_no_return = True +show_error_codes = True files = pymove [isort] From 6ffbae8dd863e41c09693372dc790a998eedd481 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 20:31:13 -0300 Subject: [PATCH 54/56] updated notebooks --- notebooks/01_Exploring_MoveDataFrame.ipynb | 1178 +++--- notebooks/02_Exploring_Preprossessing.ipynb | 1598 ++++----- notebooks/03_Exploring_Visualization.ipynb | 498 ++- notebooks/04_Exploring_Grid.ipynb | 319 +- notebooks/05_Exploring_Utils.ipynb | 1738 +++++---- notebooks/06_Exploring_Integrations.ipynb | 3574 +++++++++---------- notebooks/07_Exploring_Query.ipynb | 666 ++-- notebooks/08_Exploring_Semantic.ipynb | 488 +-- pymove/core/dataframe.py | 13 +- pymove/core/pandas.py | 2 +- pymove/utils/integration.py | 11 - pymove/utils/trajectories.py | 2 +- pymove/visualization/matplotlib.py | 2 +- 13 files changed, 4950 insertions(+), 5139 deletions(-) diff --git a/notebooks/01_Exploring_MoveDataFrame.ipynb b/notebooks/01_Exploring_MoveDataFrame.ipynb index 9340cd34..48b5bb55 100644 --- a/notebooks/01_Exploring_MoveDataFrame.ipynb +++ b/notebooks/01_Exploring_MoveDataFrame.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, "source": [ "# 01 - Exploring MoveDataFrame\n", "\n", @@ -34,32 +33,53 @@ "## Creating a MoveDataFrame\n", "\n", "A MoveDataFrame can be created by passing a Pandas DataFrame, a list, dict or even reading a file. Look:" - ] + ], + "metadata": {} }, { "cell_type": "code", "execution_count": 1, - "metadata": {}, - "outputs": [], "source": [ "import pymove as pm\n", "from pymove import MoveDataFrame" - ] + ], + "outputs": [], + "metadata": {} }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### From a list" - ] + ], + "metadata": {} }, { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "source": [ + "list_data = [\n", + " [39.984094, 116.319236, '2008-10-23 05:53:05', 1],\n", + " [39.984198, 116.319322, '2008-10-23 05:53:06', 1],\n", + " [39.984224, 116.319402, '2008-10-23 05:53:11', 1],\n", + " [39.984224, 116.319402, '2008-10-23 05:53:11', 1],\n", + " [39.984224, 116.319402, '2008-10-23 05:53:11', 1],\n", + " [39.984224, 116.319402, '2008-10-23 05:53:11', 1]\n", + "]\n", + "move_df = MoveDataFrame(data=list_data, latitude=\"lat\", longitude=\"lon\", datetime=\"datetime\", traj_id=\"id\")\n", + "move_df.head()" + ], "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " lat lon datetime id\n", + "0 39.984094 116.319236 2008-10-23 05:53:05 1\n", + "1 39.984198 116.319322 2008-10-23 05:53:06 1\n", + "2 39.984224 116.319402 2008-10-23 05:53:11 1\n", + "3 39.984224 116.319402 2008-10-23 05:53:11 1\n", + "4 39.984224 116.319402 2008-10-23 05:53:11 1" + ], "text/html": [ "
    \n", " + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    unique_idosmidelement_typeamenityfeegeometrycuisinenamename:enatm...alt_name_1not:nameareawaystypename:janame:kolonlattype_poi
    0node/269492188269492188nodetoiletsnoPOINT (116.26750 39.98087)NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaN116.26750439.980869toilets
    1node/274942287274942287nodetoiletsNaNPOINT (116.27358 39.99664)NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaN116.27357939.996640toilets
    2node/276320137276320137nodefast_foodNaNPOINT (116.33756 39.97541)chinese永和大王NaNNaN...NaNNaNNaNNaNNaNNaNNaN116.33755739.975411fast_food
    3node/276320142276320142nodemassageNaNPOINT (116.33751 39.97546)NaNFootmassage 富橋NaNNaN...NaNNaNNaNNaNNaNNaNNaN116.33751039.975463massage
    4node/286242547286242547nodetoiletsNaNPOINT (116.19982 40.00670)NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaN116.19982240.006700toilets
    5node/286246121286246121nodewaste_basketNaNPOINT (116.20290 39.99787)NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaN116.20290239.997869waste_basket
    6node/290600874290600874nodecafeNaNPOINT (116.32900 39.99117)NaN迷你站奶茶专门店Mini Station MilkteaNaN...NaNNaNNaNNaNNaNNaNNaN116.32899739.991167cafe
    7node/297407376297407376noderestaurantNaNPOINT (116.33981 39.97537)NaN沸腾渔乡NaNNaN...NaNNaNNaNNaNNaNNaNNaN116.33981039.975369restaurant
    8node/297407444297407444nodebankNaNPOINT (116.33826 39.97546)NaN招商银行China Merchants Bankyes...NaNNaNNaNNaNNaNNaNNaN116.33826039.975462bank
    9node/312152376312152376noderestaurantNaNPOINT (116.32766 39.99113)NaN永和大王Yonghe KingNaN...NaNNaNNaNNaNNaNNaNNaN116.32766039.991132restaurant
    +

    10 rows × 121 columns

    +
    - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + + +.. code:: ipython3 + + df_6 = move_df.copy() + df_6 = it.join_with_pois_by_category(df_6, POIs_5, label_category='amenity', label_id='name') @@ -770,8 +1024,6 @@ Executing the function dist_massage id_waste_basket dist_waste_basket - id_parking - dist_parking id_cafe dist_cafe id_restaurant @@ -787,21 +1039,19 @@ Executing the function 116.319236 2008-10-23 05:53:05 1 - 274942287 + NaN 4132.229067 - 276320137 + 永和大王 1835.502157 - 276320142 + Footmassage 富橋 1829.070918 - 286246121 + NaN 10028.323311 - 286251975 - 9867.997250 - 290600874 + 迷你站奶茶专门店 1144.603484 - 297407376 - 2003.585998 - 297407444 + 永和大王 + 1061.807427 + 招商银行 1883.831094 @@ -810,21 +1060,19 @@ Executing the function 116.319322 2008-10-23 05:53:06 1 - 274942287 + NaN 4135.240296 - 276320137 + 永和大王 1835.403414 - 276320142 + Footmassage 富橋 1828.951254 - 286246121 + NaN 10033.797904 - 286251975 - 9874.072277 - 290600874 + 迷你站奶茶专门店 1131.338544 - 297407376 - 2002.819358 - 297407444 + 永和大王 + 1048.334810 + 招商银行 1883.466601 @@ -833,21 +1081,19 @@ Executing the function 116.319402 2008-10-23 05:53:11 1 - 274942287 + NaN 4140.698090 - 276320137 + 永和大王 1831.182086 - 276320142 + Footmassage 富橋 1824.720741 - 286246121 + NaN 10040.095434 - 286251975 - 9880.550663 - 290600874 + 迷你站奶茶专门店 1124.395459 - 297407376 - 1998.303264 - 297407444 + 永和大王 + 1041.594793 + 招商银行 1879.127020 @@ -856,21 +1102,19 @@ Executing the function 116.319389 2008-10-23 05:53:16 1 - 274942287 + NaN 4140.136625 - 276320137 + 永和大王 1831.345213 - 276320142 + Footmassage 富橋 1824.886604 - 286246121 + NaN 10039.220172 - 286251975 - 9879.599237 - 290600874 + 迷你站奶茶专门店 1126.193301 - 297407376 - 1998.555890 - 297407444 + 永和大王 + 1043.408891 + 招商银行 1879.325712 @@ -879,21 +1123,19 @@ Executing the function 116.319422 2008-10-23 05:53:21 1 - 274942287 + NaN 4142.564150 - 276320137 + 永和大王 1829.326076 - 276320142 + Footmassage 富橋 1822.864349 - 286246121 + NaN 10041.897836 - 286251975 - 9882.326278 - 290600874 + 迷你站奶茶专门店 1123.692580 - 297407376 - 1996.436934 - 297407444 + 永和大王 + 1041.019464 + 招商银行 1877.266370 @@ -902,21 +1144,19 @@ Executing the function 116.319865 2008-10-23 05:53:23 1 - 274942287 + NaN 4160.348133 - 276320137 + 永和大王 1827.992513 - 276320142 + Footmassage 富橋 1821.434719 - 286246121 + NaN 10071.059512 - 286251975 - 9914.343371 - 290600874 + 迷你站奶茶专门店 1058.680139 - 297407376 - 1991.704195 - 297407444 + 永和大王 + 975.127648 + 招商银行 1874.593280 @@ -925,21 +1165,19 @@ Executing the function 116.319810 2008-10-23 05:53:28 1 - 274942287 + NaN 4157.187813 - 276320137 + 永和大王 1829.602658 - 276320142 + Footmassage 富橋 1823.053098 - 286246121 + NaN 10067.008973 - 286251975 - 9910.073317 - 290600874 + 迷你站奶茶专门店 1064.838599 - 297407376 - 1993.624392 - 297407444 + 永和大王 + 981.250366 + 招商银行 1876.325343 @@ -948,21 +1186,19 @@ Executing the function 116.319773 2008-10-23 05:53:33 1 - 274942287 + NaN 4156.022778 - 276320137 + 永和大王 1829.027475 - 276320142 + Footmassage 富橋 1822.486938 - 286246121 + NaN 10064.722571 - 286251975 - 9907.496404 - 290600874 + 迷你站奶茶专门店 1071.002908 - 297407376 - 1993.387313 - 297407444 + 永和大王 + 987.550029 + 招商银行 1875.882508 @@ -971,21 +1207,19 @@ Executing the function 116.319732 2008-10-23 05:53:38 1 - 274942287 + NaN 4153.324576 - 276320137 + 永和大王 1830.866492 - 276320142 + Footmassage 富橋 1824.330899 - 286246121 + NaN 10061.545473 - 286251975 - 9904.207906 - 290600874 + 迷你站奶茶专门店 1074.850689 - 297407376 - 1995.406736 - 297407444 + 永和大王 + 991.312815 + 招商银行 1877.792905 @@ -994,21 +1228,19 @@ Executing the function 116.319728 2008-10-23 05:53:43 1 - 274942287 + NaN 4154.833968 - 276320137 + 永和大王 1827.989263 - 276320142 + Footmassage 富橋 1821.460600 - 286246121 + NaN 10062.044871 - 286251975 - 9904.433434 - 290600874 + 迷你站奶茶专门店 1078.957681 - 297407376 - 1992.786980 - 297407444 + 永和大王 + 995.702764 + 招商银行 1875.015873 @@ -1031,13 +1263,6 @@ to simulate an operation. indexOfPois = np.arange(0, POIs.shape[0], POIs.shape[0]/20, dtype=np.int64) POIs_events = POIs.iloc[indexOfPois].copy() - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. code:: ipython3 randomIndexOfMoveDf = np.arange(0, move_df.shape[0], move_df.shape[0]/20, dtype=np.int64) @@ -1053,7 +1278,7 @@ to simulate an operation. .. code:: ipython3 - it.join_with_poi_datetime( + df_7 = it.join_with_events( df_7, POIs_events, label_date='datetime', time_window=900, label_event_id='osmid', label_event_type='amenity' @@ -1063,7 +1288,7 @@ to simulate an operation. .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=20))) .. code:: ipython3 @@ -1159,128 +1384,7 @@ to simulate an operation. -8. Optimized Integrating events (points of interest with timestamp) to the DataSet ----------------------------------------------------------------------------------- - -.. code:: ipython3 - - df_8 = move_df.copy() - POIs_events["event_id"] = POIs_events["osmid"] - POIs_events["event_type"] = POIs_events["amenity"] - it.join_with_poi_datetime_optimizer(df_8, POIs_events) - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - - -.. parsed-literal:: - - VBox(children=(HTML(value=''), IntProgress(value=0, max=20))) - - -.. parsed-literal:: - - <__array_function__ internals>:5: DeprecationWarning: Calling nonzero on 0d arrays is deprecated, as it behaves surprisingly. Use `atleast_1d(cond).nonzero()` if the old behavior was intended. If the context of this warning is of the form `arr[nonzero(cond)]`, just use `arr[cond]`. - - -.. code:: ipython3 - - df_8.head() - - - - -.. raw:: html - -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    latlondatetimeidevent_iddist_eventevent_type
    039.984094116.3192362008-10-23 05:53:051269492188118.990536toilets
    139.984198116.3193222008-10-23 05:53:061269492188120.895985toilets
    239.984224116.3194022008-10-23 05:53:111269492188118.147609toilets
    339.984211116.3193892008-10-23 05:53:161269492188117.863168toilets
    439.984217116.3194222008-10-23 05:53:211269492188116.416939toilets
    -
    - - - -9. Integration with Point of Interest HOME +8. Integration with Point of Interest HOME ------------------------------------------ The Home type contains, in addition to latitude, longitude and id, the @@ -1290,23 +1394,16 @@ Creating a home point .. code:: ipython3 - df_9 = move_df.copy() - home_df = df_9.iloc[300:302].copy() + df_8 = move_df.copy() + home_df = df_8.iloc[300:302].copy() home_df['formatted_address'] = ['Rua1, n02', 'Rua2, n03'] home_df['city'] = ['ChinaTown', 'ChinaTown'] - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - Using the function .. code:: ipython3 - it.join_with_home_by_id(df_9, home_df, label_id='id') + df_8 = it.join_with_home_by_id(df_8, home_df, label_id='id') @@ -1317,13 +1414,7 @@ Using the function .. code:: ipython3 - df_9.head() - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) + df_8.head() @@ -1414,43 +1505,31 @@ Using the function -10. Merge of HOME with DataSet already integrated with POIs ------------------------------------------------------------ +9. Merge of HOME with DataSet already integrated with POIs +---------------------------------------------------------- Integration .. code:: ipython3 - it.join_with_pois(df_9, POIs, label_id='osmid', label_poi_name='name') - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) + df_9 = it.join_with_pois(df_8, POIs, label_id='osmid', label_poi_name='name') .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=746))) .. code:: ipython3 - it.merge_home_with_poi(df_9) + df_9 = it.merge_home_with_poi(df_9) .. code:: ipython3 df_9.head() -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. raw:: html @@ -1570,12 +1649,6 @@ Converts POIs of the types “bank_filials”, “bank_agencies”, banks_pois.head() -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. raw:: html @@ -1655,19 +1728,13 @@ Converts POIs of the types “bank_filials”, “bank_agencies”, .. code:: ipython3 #Join with POIs - it.join_with_pois(df_banks, banks_pois, label_id='id', label_poi_name='type_poi') - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) + df_banks = it.join_with_pois(df_banks, banks_pois, label_id='id', label_poi_name='type_poi') .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=5))) .. code:: ipython3 @@ -1841,16 +1908,10 @@ Converts POIs of the types “bank_filials”, “bank_agencies”, bank: 1238 -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. code:: ipython3 #Finally, the Union - it.union_poi_bank(df_banks, label_poi="name_poi") + df_banks = it.union_poi_bank(df_banks, label_poi="name_poi") #Result df_banks.head() @@ -1950,12 +2011,6 @@ Converts POIs of the types “bank_filials”, “bank_agencies”, df_banks.loc[df_banks['name_poi'] == 'banks'].shape[0] -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: @@ -2064,19 +2119,13 @@ Converts “transit_station” and “bus_points” POIs to a single type: .. code:: ipython3 #Integration - it.join_with_pois(df_bus, bus_pois, label_id='id', label_poi_name='name_poi') - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) + df_bus = it.join_with_pois(df_bus, bus_pois, label_id='id', label_poi_name='name_poi') .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=5))) .. code:: ipython3 @@ -2188,16 +2237,10 @@ Converts “transit_station” and “bus_points” POIs to a single type: Number of points close to pontos_de_onibus's: 2154 -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. code:: ipython3 #The union function - it.union_poi_bus_station(df_bus, label_poi="name_poi") + df_bus = it.union_poi_bus_station(df_bus, label_poi="name_poi") df_bus.head() @@ -2297,12 +2340,6 @@ Converts “transit_station” and “bus_points” POIs to a single type: df_bus.loc[df_bus['name_poi'] == 'bus_station'].shape[0] -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: @@ -2400,19 +2437,13 @@ Converts “bar” and “restaurant” POIs to a single type: “bar-restaurant .. code:: ipython3 #Integration - it.join_with_pois(df_bar, br_POIs, label_id='id', label_poi_name='name_poi') - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) + df_bar = it.join_with_pois(df_bar, br_POIs, label_id='id', label_poi_name='name_poi') .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=4))) .. code:: ipython3 @@ -2525,16 +2556,10 @@ Converts “bar” and “restaurant” POIs to a single type: “bar-restaurant Closest type points 'restaurant': 2461 -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. code:: ipython3 #Union of the two types of POIs into a single - it.union_poi_bar_restaurant(df_bar, label_poi="name_poi") + df_bar = it.union_poi_bar_restaurant(df_bar, label_poi="name_poi") #Result df_bar.head() @@ -2634,12 +2659,6 @@ Converts “bar” and “restaurant” POIs to a single type: “bar-restaurant df_bar.loc[df_bar['name_poi'] == 'bar-restaurant'].shape[0] -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: @@ -2737,22 +2756,16 @@ Converts “pracas_e_parques” and “park” POIs to a single type: “parks .. code:: ipython3 #Integration - it.join_with_pois(df_parks, p_POIs, label_id='id', label_poi_name='name_poi') + df_parks = it.join_with_pois(df_parks, p_POIs, label_id='id', label_poi_name='name_poi') #Result df_parks.head() -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=4))) @@ -2859,16 +2872,10 @@ Converts “pracas_e_parques” and “park” POIs to a single type: “parks Number of points closest to park: 2284 -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. code:: ipython3 #Union function - it.union_poi_parks(df_parks, label_poi="name_poi") + df_parks = it.union_poi_parks(df_parks, label_poi="name_poi") df_parks.head() @@ -2967,12 +2974,6 @@ Converts “pracas_e_parques” and “park” POIs to a single type: “parks df_parks.loc[df_parks['name_poi'] == 'parks'].shape[0] -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: @@ -3068,21 +3069,15 @@ Union of police points .. code:: ipython3 #Integration - it.join_with_pois(df_police, pol_POIs, label_id='id', label_poi_name='name_poi') + df_police = it.join_with_pois(df_police, pol_POIs, label_id='id', label_poi_name='name_poi') df_police.head() -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: - VBox(children=(HTML(value=''), IntProgress(value=0, max=5000))) + VBox(children=(HTML(value=''), IntProgress(value=0, max=4))) @@ -3186,16 +3181,10 @@ Union of police points Number of points closest to distritos_policiais: 3420 -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. code:: ipython3 #Union funcion - it.union_poi_police(df_police, label_poi="name_poi") + df_police = it.union_poi_police(df_police, label_poi="name_poi") .. code:: ipython3 @@ -3297,12 +3286,6 @@ Union of police points df_police.loc[df_police['name_poi'] == 'police'].shape[0] -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. parsed-literal:: @@ -3406,12 +3389,6 @@ Union of police points area_c -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - .. raw:: html @@ -3483,13 +3460,7 @@ Union of police points .. code:: ipython3 #Integration - it.join_collective_areas(gdf, area_c) - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) + gdf = it.join_collective_areas(gdf, area_c) @@ -3582,48 +3553,3 @@ Union of police points - - - -Viewing points on the map -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Collective Area points - -.. code:: ipython3 - - folium.plot_markers(area_c) - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - - - -.. raw:: html - -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    - - - -Corresponding points - -.. code:: ipython3 - - folium.plot_markers(gdf.loc[gdf['violating'] == True]) - - -.. parsed-literal:: - - /home/flycher/anaconda3/envs/pmv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. - and should_run_async(code) - - - - -.. raw:: html - -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    diff --git a/docs/examples/07_Exploring_Query.rst b/docs/examples/07_Exploring_Query.rst index 902007b3..c6dfeacf 100644 --- a/docs/examples/07_Exploring_Query.rst +++ b/docs/examples/07_Exploring_Query.rst @@ -15,7 +15,7 @@ ------------ DataSet - `Hurricanes and -Typhoons `__:he NHC +Typhoons `__: The NHC publishes the tropical cyclone historical database in a format known as HURDAT, short for HURricane DATabase @@ -729,14 +729,14 @@ Visualization .. code:: ipython3 - folium.plot_trajectories_with_folium(hurricanes_2012, zoom_start=2) + folium.plot_trajectories(hurricanes_2012, zoom_start=2) .. raw:: html -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    +
    Make this Notebook Trusted to load map: File -> Trust Notebook
    @@ -758,11 +758,21 @@ Visualization .. code:: ipython3 #Selecting a hurricane for demonstration - gonzalo = hurricanes_2012.loc[hurricanes_2012['id'] == ' GONZALO'] + gonzalo = hurricanes_2012.loc[hurricanes_2012['id'].str.strip() == 'GONZALO'] + gonzalo.shape + + + + +.. parsed-literal:: + + (39, 23) + + .. code:: ipython3 - folium.plot_trajectories_with_folium( + folium.plot_trajectories( gonzalo, lat_origin=gonzalo['lat'].median(), lon_origin=gonzalo['lon'].median(), zoom_start=2 ) @@ -771,7 +781,7 @@ Visualization .. raw:: html -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    +
    Make this Notebook Trusted to load map: File -> Trust Notebook
    @@ -794,14 +804,14 @@ Using distance MEDP (Mean Euclidean Distance Predictive) .. code:: ipython3 - folium.plot_trajectories_with_folium(prox_Gonzalo, zoom_start=3) + folium.plot_trajectories(prox_Gonzalo, zoom_start=3) .. raw:: html -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    +
    Make this Notebook Trusted to load map: File -> Trust Notebook
    @@ -821,14 +831,14 @@ Using Distance MEDT (Mean Euclidean Distance Trajectory) .. code:: ipython3 - folium.plot_trajectories_with_folium(prox_Gonzalo, zoom_start=3) + folium.plot_trajectories(prox_Gonzalo, zoom_start=3) .. raw:: html -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    +
    Make this Notebook Trusted to load map: File -> Trust Notebook
    @@ -851,14 +861,14 @@ Using distance MEDP (Mean Euclidean Distance Predictive) .. code:: ipython3 - folium.plot_trajectories_with_folium(prox_Gonzalo, zoom_start=3) + folium.plot_trajectories(prox_Gonzalo, zoom_start=3) .. raw:: html -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    +
    Make this Notebook Trusted to load map: File -> Trust Notebook
    @@ -878,11 +888,11 @@ Using Distance MEDT (Mean Euclidean Distance Trajectory) .. code:: ipython3 - folium.plot_trajectories_with_folium(prox_Gonzalo, zoom_start=3) + folium.plot_trajectories(prox_Gonzalo, zoom_start=3) .. raw:: html -
    Make this Notebook Trusted to load map: File -> Trust Notebook
    +
    Make this Notebook Trusted to load map: File -> Trust Notebook
    From b66371289d0f795605a9e01adf4af3273c420de2 Mon Sep 17 00:00:00 2001 From: flych3r Date: Tue, 13 Jul 2021 21:01:36 -0300 Subject: [PATCH 56/56] =?UTF-8?q?Bump=20version:=202.7.2=20=E2=86=92=203.0?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pymove/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pymove/__init__.py b/pymove/__init__.py index a22657ba..32bdb6a9 100644 --- a/pymove/__init__.py +++ b/pymove/__init__.py @@ -33,4 +33,4 @@ from .utils.trajectories import read_csv from .visualization import folium, matplotlib -__version__ = '2.7.2' +__version__ = '3.0.0' diff --git a/setup.cfg b/setup.cfg index e38f6dae..648286f1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.7.2 +current_version = 3.0.0 tag_name = {new_version} commit = True diff --git a/setup.py b/setup.py index 4f2cd6b0..afbd4b53 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='pymove', - version='2.7.2', + version='3.0.0', author='Insight Data Science Lab', author_email='insightlab@dc.ufc.br', license='MIT',