In [1]:
from pymove import MoveDataFrame
import pymove
import pandas as pd
from numpy.testing import assert_array_equal
from pymove import read_csv

In [2]:
def _has_columns(data):
    """
    Checks whether the received dataset has 'lat', 'lon', 'datetime' columns.

    Parameters
    ----------
    data : dict, list, numpy array or pandas.core.DataFrame.
        Input trajectory data.

    Returns
    -------
    bool
        Represents whether or not you have the required columns.

    """
    if 'lat' in data and 'lon' in data and 'datetime' in data:
        return True
    return False

In [3]:
def _validate_moveDataFrame_data(data):
    try:
        if data.dtypes.lat != 'float32':
            return False
        if data.dtypes.lon != 'float32':
            return False
        if data.dtypes.datetime != 'datetime64[ns]':
            return False
        return True
    except AttributeError:
        print(AttributeError)

In [2]:
list_data = [[39.984094, 116.319236, '2008-10-23 05:53:05', 1],
             [39.984198, 116.319322, '2008-10-23 05:53:06', 1],
             [39.984224, 116.319402, '2008-10-23 05:53:11', 2], 
             [39.984224, 116.319402, '2008-10-23 05:53:11', 2]]
move_df = MoveDataFrame(data=list_data, latitude="lat", longitude="lon", datetime="datetime", traj_id="id")

In [5]:
def test_MoveDataFrame_from_list():
    list_data = [[39.984094, 116.319236, '2008-10-23 05:53:05', 1],
             [39.984198, 116.319322, '2008-10-23 05:53:06', 1],
             [39.984224, 116.319402, '2008-10-23 05:53:11', 1], 
             [39.984224, 116.319402, '2008-10-23 05:53:11', 1]]
    move_df = MoveDataFrame(data=list_data, latitude="lat", longitude="lon", datetime="datetime", traj_id="id")
    assert _has_columns(move_df)
    assert _validate_moveDataFrame_data(move_df)

In [6]:
test_MoveDataFrame_from_list()

In [7]:
def test_MoveDataFrame_from_dict():
    dict_data = {
        'lat': [39.984198, 39.984224, 39.984094],
        'lon': [116.319402, 116.319322, 116.319402],
        'datetime': ['2008-10-23 05:53:11', '2008-10-23 05:53:06', '2008-10-23 05:53:06']
    }
    move_df = MoveDataFrame(data=dict_data, latitude="lat", longitude="lon", datetime="datetime", traj_id="id")
    assert _has_columns(move_df)
    assert _validate_moveDataFrame_data(move_df)

In [8]:
test_MoveDataFrame_from_dict()

In [9]:
def test_MoveDataFrame_from_DataFrame():
    import pandas as pd

    df = pd.read_csv('examples/geolife_sample.csv', parse_dates=['datetime'])
    move_df = MoveDataFrame(data=df, latitude="latp", longitude="lon", datetime="datetime")
    assert _has_columns(move_df)
    assert _validate_moveDataFrame_data(move_df) 

In [10]:
test_MoveDataFrame_from_DataFrame()

In [11]:
def test_MoveDataFrame_from_file():
    import pymove
    move_df = read_csv('examples/geolife_sample.csv')
    assert _has_columns(move_df)
    assert _validate_moveDataFrame_data(move_df)

In [12]:
test_MoveDataFrame_from_file()

In [13]:
def test_attributeError_intiatiate_MoveDataFrame_from_DataFrame():
    import pandas as pd
    df = pd.read_csv('examples/geolife_sample.csv', parse_dates=['datetime'])
    error_msg = "Could not instantiate new MoveDataFrame because data has missing columns"

    try:
        MoveDataFrame(data=df, latitude="latp", longitude="lon", datetime="datetime")
        raise AssertionError("AttributeError error not raised by MoveDataFrame")            
    except AttributeError as e:
        pass

In [14]:
from pymove import MoveDataFrame

In [15]:
move_df.head()

Unnamed: 0,lat,lon,datetime,id
0,39.984093,116.319237,2008-10-23 05:53:05,1
1,39.9842,116.319321,2008-10-23 05:53:06,1
2,39.984222,116.319405,2008-10-23 05:53:11,2
3,39.984222,116.319405,2008-10-23 05:53:11,2


In [16]:
def test_number_users():
    assert move_df.get_users_number() == 1
    move_df['user_id'] = [1, 1, 2, 3]
    assert move_df.get_users_number() == 3  
    move_df.drop('user_id', axis=1, inplace = True)    

In [17]:
test_number_users()

In [18]:
def test_to_numpy():
    import numpy
    move_numpy = move_df.to_numpy()
    assert type(move_numpy) is numpy.ndarray 

In [19]:
test_to_numpy()

In [20]:
def test_to_dict():
    move_dict = move_df.to_dict()
    assert type(move_df.to_dict()) is dict

In [21]:
test_to_dict()

In [22]:
def test_to_grid():
    import pymove
    assert type(move_df.to_grid(8)) is pymove.core.grid.Grid

In [23]:
test_to_grid()


Creating a virtual grid without polygons
...cell size by degree: 7.218478943256657e-05
...grid_size_lat_y:2
grid_size_lon_x:3

..A virtual grid was created


In [24]:
def test_to_dataFrame():
    import pandas
    assert type(move_df.to_DataFrame()) is pandas.DataFrame

In [25]:
test_to_dataFrame()

In [26]:
def test_generate_tid_based_on_id_datatime():
    
    new_move_df = move_df.generate_tid_based_on_id_datatime(inplace = False)
    assert_array_equal(new_move_df['tid'], ['12008102305', '12008102305', '22008102305', '22008102305'])
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'tid' not in move_df
    
    move_df.generate_tid_based_on_id_datatime()
    assert_array_equal(move_df['tid'], ['12008102305', '12008102305', '22008102305', '22008102305'])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'tid' in move_df
    
    move_df.drop('tid', axis=1, inplace = True)

In [27]:
test_generate_tid_based_on_id_datatime()


Creating or updating tid feature...

...Sorting by id and datetime to increase performance


...tid feature was created...


Creating or updating tid feature...

...Sorting by id and datetime to increase performance


...tid feature was created...



In [28]:
def test_generate_date_features():
    
    new_move_df = move_df.generate_date_features(inplace = False)
    assert_array_equal(new_move_df['date'].astype(str),['2008-10-23', '2008-10-23', '2008-10-23', '2008-10-23'])
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'date' not in move_df
    
    move_df.generate_date_features()
    assert_array_equal(move_df['date'].astype(str), ['2008-10-23', '2008-10-23', '2008-10-23', '2008-10-23'])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'date' in move_df
    
    move_df.drop('date', axis=1, inplace = True)    

In [29]:
test_generate_date_features()

Creating date features...
..Date features was created...

Creating date features...
..Date features was created...



In [30]:
def test_generate_hour_features():
   
    new_move_df = move_df.generate_hour_features(inplace = False)
    assert new_move_df['hour'].tolist() == [5, 5, 5, 5]
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'hour' not in move_df
    
    move_df.generate_hour_features()
    assert move_df['hour'].tolist() == [5, 5, 5, 5]
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'hour' in move_df
    
    move_df.drop('hour', axis=1, inplace = True)     

In [31]:
test_generate_hour_features()


Creating or updating a feature for hour...

...Hour feature was created...


Creating or updating a feature for hour...

...Hour feature was created...



In [32]:
def test_generate_day_of_the_week_features():
    
    new_move_df = move_df.generate_day_of_the_week_features(inplace = False)
    assert_array_equal(new_move_df['day'], ['Thursday', 'Thursday', 'Thursday', 'Thursday'])
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    print(move_df)
    assert 'day' not in move_df
    
    move_df.generate_day_of_the_week_features()
    assert_array_equal(move_df['day'].tolist(), ['Thursday', 'Thursday', 'Thursday', 'Thursday'])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'day' in move_df
    move_df.drop('day', axis=1, inplace = True) 

In [33]:
test_generate_day_of_the_week_features()


Creating or updating day of the week feature...

...the day of the week feature was created...

         lat         lon            datetime  id
0  39.984093  116.319237 2008-10-23 05:53:05   1
1  39.984200  116.319321 2008-10-23 05:53:06   1
2  39.984222  116.319405 2008-10-23 05:53:11   2
3  39.984222  116.319405 2008-10-23 05:53:11   2

Creating or updating day of the week feature...

...the day of the week feature was created...



In [34]:
def test_generate_weekend_features():
    new_move_df = move_df.generate_weekend_features(inplace = False)
    assert_array_equal(new_move_df['weekend'], [0, 0, 0, 0])
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'weekend' not in move_df
    
    move_df.generate_weekend_features()
    assert_array_equal(move_df['weekend'], [0, 0, 0, 0])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'weekend' in move_df
    move_df.drop('weekend', axis=1, inplace = True) 

In [35]:
test_generate_weekend_features()


Creating or updating day of the week feature...

...the day of the week feature was created...

Creating or updating a feature for weekend

...Weekend was set as 1 or 0...

...dropping colum day


Creating or updating day of the week feature...

...the day of the week feature was created...

Creating or updating a feature for weekend

...Weekend was set as 1 or 0...

...dropping colum day



In [36]:
def test_generate_time_of_day_features():
    new_move_df = move_df.generate_time_of_day_features(inplace = False)
    assert_array_equal(new_move_df['period'],['Early morning','Early morning','Early morning','Early morning'])
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'period' not in move_df

    move_df.generate_time_of_day_features()
    assert_array_equal(move_df['period'], ['Early morning','Early morning','Early morning','Early morning'])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'period' in move_df
    move_df.drop('period', axis=1, inplace = True) 

In [37]:
test_generate_time_of_day_features()


Creating or updating period feature
...Early morning from 0H to 6H
...Morning from 6H to 12H
...Afternoon from 12H to 18H
...Evening from 18H to 24H

...the period of day feature was created

Creating or updating period feature
...Early morning from 0H to 6H
...Morning from 6H to 12H
...Afternoon from 12H to 18H
...Evening from 18H to 24H

...the period of day feature was created


In [38]:
def test_generate_datetime_in_format_cyclical():
    new_move_df = move_df.generate_datetime_in_format_cyclical(inplace = False)
    assert_array_equal(new_move_df['hour_sin'],[0.9790840876823229, 0.9790840876823229,0.9790840876823229, 0.9790840876823229])
    assert_array_equal(new_move_df['hour_cos'], [0.20345601305263375,0.20345601305263375,0.20345601305263375,0.20345601305263375])
    assert type(new_move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'hour_sin' not in move_df
    assert 'hour_cos' not in move_df

    move_df.generate_datetime_in_format_cyclical()
    assert_array_equal(move_df['hour_sin'], [0.9790840876823229, 0.9790840876823229,0.9790840876823229, 0.9790840876823229])
    assert_array_equal(move_df['hour_cos'], [0.20345601305263375,0.20345601305263375,0.20345601305263375,0.20345601305263375])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'hour_sin' in move_df
    assert 'hour_cos' in move_df
    move_df.drop('hour_sin', axis=1, inplace = True) 
    move_df.drop('hour_cos', axis=1, inplace = True) 

In [39]:
test_generate_datetime_in_format_cyclical()

Encoding cyclical continuous features - 24-hour time
...hour_sin and  hour_cos features were created...

Encoding cyclical continuous features - 24-hour time
...hour_sin and  hour_cos features were created...



In [40]:
def test_generate_dist_features():
    import pandas as pd
    df = pd.read_csv('examples/geolife_sample.csv', parse_dates=['datetime'], nrows=5)
    df_move = MoveDataFrame(data=df, latitude="latp", longitude="lon", datetime="datetime")
    new_df_move = df_move.generate_dist_features(inplace =False)
    assert_array_equal(new_df_move['dist_to_prev'].astype(str), ['nan', '14.015318782639952', '7.345483960534693', '1.6286216204832726', '2.4484945931533275'])
    assert_array_equal(new_df_move['dist_to_next'].astype(str), ['14.015318782639952', '7.345483960534693','1.6286216204832726','2.4484945931533275','nan'])
    assert_array_equal(new_df_move['dist_prev_to_next'].astype(str), ['nan', '20.082061827224607', '5.929779944096936', '1.2242472060393084', 'nan'])
    assert type(new_df_move) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'dist_to_prev' not in df_move
    assert 'dist_to_next' not in df_move
    assert 'dist_prev_to_next' not in df_move
    
    df_move.generate_dist_features()
    assert_array_equal(df_move['dist_to_prev'].astype(str), ['nan', '14.015318782639952', '7.345483960534693', '1.6286216204832726', '2.4484945931533275'])
    assert_array_equal(df_move['dist_to_next'].astype(str), ['14.015318782639952', '7.345483960534693','1.6286216204832726','2.4484945931533275','nan'])
    assert_array_equal(df_move['dist_prev_to_next'].astype(str), ['nan', '20.082061827224607', '5.929779944096936', '1.2242472060393084', 'nan'])
    assert type(df_move) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'dist_to_prev' in df_move
    assert 'dist_to_next' in df_move
    assert 'dist_prev_to_next' in df_move

In [41]:
test_generate_dist_features()


Creating or updating distance features in meters...

...Sorting by id and datetime to increase performance

...Set id as index to increase attribution performance

(5/5) 100% in 00:00:00.002 - estimated end in 00:00:00.000
...Reset index

..Total Time: 0.004985332489013672

Creating or updating distance features in meters...

...Sorting by id and datetime to increase performance

...Set id as index to increase attribution performance

(5/5) 100% in 00:00:00.000 - estimated end in 00:00:00.000
...Reset index

..Total Time: 0.0039899349212646484


In [42]:
def test_generate_dist_time_speed_features():
    import pandas as pd
    df = pd.read_csv('examples/geolife_sample.csv', parse_dates=['datetime'], nrows=5)
    df_move = MoveDataFrame(data=df, latitude="latp", longitude="lon", datetime="datetime")
    new_df_move = df_move.generate_dist_time_speed_features(inplace =False)
    assert_array_equal(new_df_move['dist_to_prev'].astype(str), ['nan', '14.015318782639952', '7.345483960534693', '1.6286216204832726', '2.4484945931533275'])
    assert_array_equal(new_df_move['time_to_prev'].astype(str), ['nan', '1.0', '5.0', '5.0', '5.0'])
    assert_array_equal(new_df_move['speed_to_prev'].astype(str), ['nan', '14.015318782639952', '1.4690967921069387', '0.3257243240966545', '0.4896989186306655'])
    assert type(new_df_move) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'dist_to_prev' not in df_move
    assert 'time_to_prev' not in df_move
    assert 'speed_to_prev' not in df_move
    
    df_move.generate_dist_time_speed_features()
    assert_array_equal(df_move['dist_to_prev'].astype(str), ['nan', '14.015318782639952', '7.345483960534693', '1.6286216204832726', '2.4484945931533275'])
    assert_array_equal(df_move['time_to_prev'].astype(str), ['nan', '1.0', '5.0', '5.0', '5.0'])
    assert_array_equal(df_move['speed_to_prev'].astype(str), ['nan', '14.015318782639952', '1.4690967921069387', '0.3257243240966545', '0.4896989186306655'])
    assert type(df_move) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'dist_to_prev' in df_move
    assert 'time_to_prev' in df_move
    assert 'speed_to_prev' in df_move

In [43]:
test_generate_dist_time_speed_features()


Creating or updating distance, time and speed features in meters by seconds

...Sorting by id and datetime to increase performance

...Set id as index to a higher peformance

(5/5) 100% in 00:00:00.008 - estimated end in 00:00:00.000
...Reset index...

..Total Time: 0.014

Creating or updating distance, time and speed features in meters by seconds

...Sorting by id and datetime to increase performance

...Set id as index to a higher peformance

(5/5) 100% in 00:00:00.009 - estimated end in 00:00:00.000
...Reset index...

..Total Time: 0.013


In [44]:
def test_generate_move_and_stop_by_radius():
    df = pd.read_csv('examples/geolife_sample.csv', parse_dates=['datetime'], nrows=5)
    df_move = MoveDataFrame(data=df, latitude="latp", longitude="lon", datetime="datetime") 
    new_df_move = df_move.generate_move_and_stop_by_radius(inplace =False)

    assert_array_equal(new_df_move['situation'].astype(str),['nan', 'move', 'move', 'move', 'move'])
    assert type(new_df_move) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'situation' not in df_move

    df_move.generate_move_and_stop_by_radius()
    assert_array_equal(df_move['situation'].astype(str),['nan', 'move', 'move', 'move', 'move'])
    assert type(move_df) is pymove.core.dataframe.PandasMoveDataFrame
    assert 'situation' in df_move

In [45]:
test_generate_move_and_stop_by_radius()


Creating or updating distance features in meters...

...Sorting by id and datetime to increase performance

...Set id as index to increase attribution performance

(5/5) 100% in 00:00:00.001 - estimated end in 00:00:00.000
...Reset index

..Total Time: 0.004985809326171875

Creating or updating features MOVE and STOPS...


....There are 0 stops to this parameters


Creating or updating distance features in meters...

...Sorting by id and datetime to increase performance

...Set id as index to increase attribution performance

(5/5) 100% in 00:00:00.000 - estimated end in 00:00:00.000
...Reset index

..Total Time: 0.003989696502685547

Creating or updating features MOVE and STOPS...


....There are 0 stops to this parameters



In [3]:
move_df

Unnamed: 0,lat,lon,datetime,id
0,39.984093,116.319237,2008-10-23 05:53:05,1
1,39.9842,116.319321,2008-10-23 05:53:06,1
2,39.984222,116.319405,2008-10-23 05:53:11,2
3,39.984222,116.319405,2008-10-23 05:53:11,2


In [5]:
move_df.shift(periods=3)

AttributeError: 'DataFrame' object has no attribute 'consolidate'

In [6]:
move_df.plot()

ValueError: view limit minimum -36665.862263310184 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units

Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x000001B275A0AD90> (for post_execute):


ValueError: view limit minimum -36665.862263310184 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units

ValueError: view limit minimum -36665.862263310184 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units

In [7]:
move_df.dtypes

lat                float32
lon                float32
datetime    datetime64[ns]
id                   int64
dtype: object