# PyMEOS Demo for ACM Demo

In [2]:
import pandas as pd
import shapely as shp
from pymeos import *
from tqdm.auto import tqdm

tqdm.pandas()

In [3]:
pymeos_initialize()

## Preprocessing

In [9]:
raw = pd.read_csv('singapore_0.csv', header=0,
                  names=['ID', 'Trajectory ID', 'Driving Mode', 'OS Name', 'Timestamp', 'Latitude', 'Longitude',
                         'Speed', 'Bearing', 'Accuracy'],
                  usecols=['ID', 'Trajectory ID', 'Timestamp', 'Latitude', 'Longitude', 'Speed', 'Bearing', 'Accuracy'])
raw.head()

Unnamed: 0,ID,Trajectory ID,Timestamp,Latitude,Longitude,Speed,Bearing,Accuracy
0,0,70014,1554943236,1.342326,103.888969,18.91,248,3.9
1,1,73573,1555582623,1.321781,103.856366,17.719076,44,4.0
2,2,75567,1555141026,1.327088,103.861273,14.021548,34,3.9
3,3,1410,1555731693,1.262482,103.823794,13.026521,181,4.0
4,4,4354,1555584497,1.283799,103.80721,14.812943,93,3.9


We preprocess the row data so that:
- Rows that have null values are deleted.
- Timestamps are converted from millis since epoch to `datetime` objects.
- A new column with Shapely Points is generated.

In [10]:
pd.to_datetime(raw['Timestamp'], origin='unix', unit='s')

0         2019-04-11 00:40:36
1         2019-04-18 10:17:03
2         2019-04-13 07:37:06
3         2019-04-20 03:41:33
4         2019-04-18 10:48:17
                  ...        
3034548   2019-04-11 08:27:17
3034549   2019-04-14 13:25:52
3034550   2019-04-21 00:15:14
3034551   2019-04-19 14:58:11
3034552   2019-04-09 23:28:51
Name: Timestamp, Length: 3034553, dtype: datetime64[ns]

In [11]:
raw = raw.dropna()
raw['Timestamp'] = pd.to_datetime(raw['Timestamp'], origin='unix', unit='s')
raw['Location'] = shp.points(raw['Longitude'], raw['Latitude'])
raw.head()

Unnamed: 0,ID,Trajectory ID,Timestamp,Latitude,Longitude,Speed,Bearing,Accuracy,Location
0,0,70014,2019-04-11 00:40:36,1.342326,103.888969,18.91,248,3.9,POINT (103.888969 1.3423256)
1,1,73573,2019-04-18 10:17:03,1.321781,103.856366,17.719076,44,4.0,POINT (103.8563664 1.3217811)
2,2,75567,2019-04-13 07:37:06,1.327088,103.861273,14.021548,34,3.9,POINT (103.8612733 1.3270883)
3,3,1410,2019-04-20 03:41:33,1.262482,103.823794,13.026521,181,4.0,POINT (103.8237941 1.2624821)
4,4,4354,2019-04-18 10:48:17,1.283799,103.80721,14.812943,93,3.9,POINT (103.8072101 1.283799)


## PyMEOS objects generation
Now, we use the original dataset to build the PyMEOS temporal objects.

In [14]:
trajectories = raw[['Trajectory ID', 'Timestamp']].copy()

We start by generating an instant object (`TFloatInst` and `TGeomPointInst`) from the original data

In [15]:
trajectories['Speed'] = raw.progress_apply(lambda row: TFloatInst(value=row['Speed'], timestamp=row['Timestamp']),
                                           axis=1)
trajectories['Bearing'] = raw.progress_apply(lambda row: TFloatInst(value=row['Bearing'], timestamp=row['Timestamp']),
                                             axis=1)
trajectories['Location'] = raw.progress_apply(
    lambda row: TGeomPointInst(point=row['Location'], timestamp=row['Timestamp']), axis=1)
trajectories.head()

  0%|          | 0/3034553 [00:00<?, ?it/s]

  0%|          | 0/3034553 [00:00<?, ?it/s]

  0%|          | 0/3034553 [00:00<?, ?it/s]

Unnamed: 0,Trajectory ID,Timestamp,Speed,Bearing,Location
0,70014,2019-04-11 00:40:36,18@2019-04-11 00:40:36+02,248@2019-04-11 00:40:36+02,01010000001A8A3BDEE4F85940555689682A7AF53F@201...
1,73573,2019-04-18 10:17:03,17@2019-04-18 10:17:03+02,44@2019-04-18 10:17:03+02,01010000002B5904B5CEF65940D4884FF00326F53F@201...
2,75567,2019-04-13 07:37:06,14@2019-04-13 07:37:06+02,34@2019-04-13 07:37:06+02,0101000000C01A0C1A1FF75940C177F6F0C03BF53F@201...
3,1410,2019-04-20 03:41:33,13@2019-04-20 03:41:33+02,181@2019-04-20 03:41:33+02,0101000000D188E30AB9F459400191346E2033F43F@201...
4,4354,2019-04-18 10:48:17,14@2019-04-18 10:48:17+02,93@2019-04-18 10:48:17+02,01010000000F208D54A9F359403733FAD1708AF43F@201...


In [16]:
t = trajectories[['Trajectory ID', 'Timestamp']].copy()
t['Speed'] = trajectories['Speed'].map(lambda x: x.as_hexwkb())
t['Bearing'] = trajectories['Bearing'].map(lambda x: x.as_hexwkb())
t['Location'] = trajectories['Location'].map(lambda x: x.as_hexwkb())
t.to_csv('singapore_instants.csv', index=False)

In [4]:
trajectories = pd.read_csv('singapore_instants.csv', parse_dates=['Timestamp'], converters={
    'Speed': TFloatInst.from_hexwkb,
    'Bearing': TFloatInst.from_hexwkb,
    'Location': TGeomPointInst.from_hexwkb
})
trajectories.head()

Unnamed: 0.1,Unnamed: 0,Trajectory ID,Speed,Bearing,Location
0,0,70014,18@2019-04-11 00:40:36+02,248@2019-04-11 00:40:36+02,01010000001A8A3BDEE4F85940555689682A7AF53F@201...
1,1,73573,17@2019-04-18 10:17:03+02,44@2019-04-18 10:17:03+02,01010000002B5904B5CEF65940D4884FF00326F53F@201...
2,2,75567,14@2019-04-13 07:37:06+02,34@2019-04-13 07:37:06+02,0101000000C01A0C1A1FF75940C177F6F0C03BF53F@201...
3,3,1410,13@2019-04-20 03:41:33+02,181@2019-04-20 03:41:33+02,0101000000D188E30AB9F459400191346E2033F43F@201...
4,4,4354,14@2019-04-18 10:48:17+02,93@2019-04-18 10:48:17+02,01010000000F208D54A9F359403733FAD1708AF43F@201...


In [5]:
trajectories['Timestamp'] = trajectories['Speed'].map(lambda speed: speed.timestamp())
trajectories.head()

Unnamed: 0.1,Unnamed: 0,Trajectory ID,Speed,Bearing,Location,Timestamp
0,0,70014,18@2019-04-11 00:40:36+02,248@2019-04-11 00:40:36+02,01010000001A8A3BDEE4F85940555689682A7AF53F@201...,2019-04-11 00:40:36+02:00
1,1,73573,17@2019-04-18 10:17:03+02,44@2019-04-18 10:17:03+02,01010000002B5904B5CEF65940D4884FF00326F53F@201...,2019-04-18 10:17:03+02:00
2,2,75567,14@2019-04-13 07:37:06+02,34@2019-04-13 07:37:06+02,0101000000C01A0C1A1FF75940C177F6F0C03BF53F@201...,2019-04-13 07:37:06+02:00
3,3,1410,13@2019-04-20 03:41:33+02,181@2019-04-20 03:41:33+02,0101000000D188E30AB9F459400191346E2033F43F@201...,2019-04-20 03:41:33+02:00
4,4,4354,14@2019-04-18 10:48:17+02,93@2019-04-18 10:48:17+02,01010000000F208D54A9F359403733FAD1708AF43F@201...,2019-04-18 10:48:17+02:00


Now, we sort the rows by time, and aggregate them by Trajectory ID, aggregating the columns in lists

In [6]:
trajectories = trajectories.sort_values(by='Timestamp').groupby('Trajectory ID').agg(list)
trajectories.head()

Unnamed: 0_level_0,Unnamed: 0,Speed,Bearing,Location,Timestamp
Trajectory ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,"[1766918, 1766774, 1768732, 1759413, 1767261, ...","[25@2019-04-09 02:38:49+02, 25@2019-04-09 02:3...","[97@2019-04-09 02:38:49+02, 93@2019-04-09 02:3...",[0101000000D5C7E825EBF659408BC0FD254E53F43F@20...,"[2019-04-09 02:38:49+02:00, 2019-04-09 02:38:5..."
3,"[244964, 248831, 252494, 258537, 248572, 25208...","[5@2019-04-12 08:32:42+02, 8@2019-04-12 08:32:...","[106@2019-04-12 08:32:42+02, 122@2019-04-12 08...",[0101000000961A52047BF659400315C0F94285F43F@20...,"[2019-04-12 08:32:42+02:00, 2019-04-12 08:32:4..."
4,"[1134392, 1131321, 1139425, 1132423, 1132339, ...","[7@2019-04-13 10:42:46+02, 7@2019-04-13 10:42:...","[205@2019-04-13 10:42:46+02, 194@2019-04-13 10...",[0101000000BE8AE7C73AF75940BA72AC414EAEF53F@20...,"[2019-04-13 10:42:46+02:00, 2019-04-13 10:42:5..."
5,"[693513, 695204, 697258, 703039, 696074, 69914...","[2@2019-04-20 01:39:39+02, 10@2019-04-20 01:39...","[28@2019-04-20 01:39:39+02, 38@2019-04-20 01:3...",[0101000000699F86A7B9F159400992A5284274F53F@20...,"[2019-04-20 01:39:39+02:00, 2019-04-20 01:39:5..."
8,"[1902631, 1909677, 1894085, 1899668, 1909055, ...","[13@2019-04-11 07:24:36+02, 0@2019-04-11 07:25...","[319@2019-04-11 07:24:36+02, 0@2019-04-11 07:2...",[010100000095946016FFFC5940F36ED16E9958F53F@20...,"[2019-04-11 07:24:36+02:00, 2019-04-11 07:25:0..."


In [8]:
trajectories = trajectories.drop('Unnamed: 0', axis=1)
trajectories.head()

Unnamed: 0_level_0,Speed,Bearing,Location,Timestamp
Trajectory ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,"[25@2019-04-09 02:38:49+02, 25@2019-04-09 02:3...","[97@2019-04-09 02:38:49+02, 93@2019-04-09 02:3...",[0101000000D5C7E825EBF659408BC0FD254E53F43F@20...,"[2019-04-09 02:38:49+02:00, 2019-04-09 02:38:5..."
3,"[5@2019-04-12 08:32:42+02, 8@2019-04-12 08:32:...","[106@2019-04-12 08:32:42+02, 122@2019-04-12 08...",[0101000000961A52047BF659400315C0F94285F43F@20...,"[2019-04-12 08:32:42+02:00, 2019-04-12 08:32:4..."
4,"[7@2019-04-13 10:42:46+02, 7@2019-04-13 10:42:...","[205@2019-04-13 10:42:46+02, 194@2019-04-13 10...",[0101000000BE8AE7C73AF75940BA72AC414EAEF53F@20...,"[2019-04-13 10:42:46+02:00, 2019-04-13 10:42:5..."
5,"[2@2019-04-20 01:39:39+02, 10@2019-04-20 01:39...","[28@2019-04-20 01:39:39+02, 38@2019-04-20 01:3...",[0101000000699F86A7B9F159400992A5284274F53F@20...,"[2019-04-20 01:39:39+02:00, 2019-04-20 01:39:5..."
8,"[13@2019-04-11 07:24:36+02, 0@2019-04-11 07:25...","[319@2019-04-11 07:24:36+02, 0@2019-04-11 07:2...",[010100000095946016FFFC5940F36ED16E9958F53F@20...,"[2019-04-11 07:24:36+02:00, 2019-04-11 07:25:0..."


Finally, instants are merged into PyMEOS Sequences

In [9]:
trajectories['Speed'] = trajectories['Speed'].progress_apply(lambda x: TFloatSeq(instant_list=x, upper_inc=True))
trajectories['Bearing'] = trajectories['Bearing'].progress_apply(lambda x: TFloatSeq(instant_list=x, upper_inc=True))
trajectories['Trajectory'] = trajectories['Location'].progress_apply(lambda x: TGeomPointSeq(instant_list=x, upper_inc=True).set_srid(25832))
trajectories = trajectories.drop(['Location', 'Timestamp'], axis=1)

  0%|          | 0/28000 [00:00<?, ?it/s]

  0%|          | 0/28000 [00:00<?, ?it/s]

  0%|          | 0/28000 [00:00<?, ?it/s]

We store the DataFrame in a csv file to save the processing.
Sequence objects need to be transformed to text to be able to be parsed later.
HexWKB format is chosen here.

In [10]:
t = trajectories[['Speed', 'Bearing', 'Trajectory']].copy()
t['Speed'] = t['Speed'].map(lambda x: x.as_hexwkb())
t['Bearing'] = t['Bearing'].map(lambda x: x.as_hexwkb())
t['Trajectory'] = t['Trajectory'].map(lambda x: x.as_hexwkb())
t.to_csv('singapore_trajectories.csv')

In [12]:
trajectories = pd.read_csv('singapore_trajectories.csv', index_col='Trajectory ID', converters={
    'Speed': TFloatSeq.from_hexwkb,
    'Bearing': TFloatSeq.from_hexwkb,
    'Location': TGeomPointSeq.from_hexwkb
})
trajectories.head()

Unnamed: 0_level_0,Speed,Bearing,Trajectory
Trajectory ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,"[25@2019-04-09 02:38:49+02, 25@2019-04-09 02:3...","[97@2019-04-09 02:38:49+02, 93@2019-04-09 02:3...",0128000E6900000003D5C7E825EBF659408BC0FD254E53...
3,"[5@2019-04-12 08:32:42+02, 8@2019-04-12 08:32:...","[106@2019-04-12 08:32:42+02, 122@2019-04-12 08...",0128000E5900000003961A52047BF659400315C0F94285...
4,"[7@2019-04-13 10:42:46+02, 7@2019-04-13 10:42:...","[205@2019-04-13 10:42:46+02, 194@2019-04-13 10...",0128000E6600000003BE8AE7C73AF75940BA72AC414EAE...
5,"[2@2019-04-20 01:39:39+02, 10@2019-04-20 01:39...","[28@2019-04-20 01:39:39+02, 38@2019-04-20 01:3...",0128000E7400000003699F86A7B9F159400992A5284274...
8,"[13@2019-04-11 07:24:36+02, 0@2019-04-11 07:25...","[319@2019-04-11 07:24:36+02, 0@2019-04-11 07:2...",0128000E5F0000000395946016FFFC5940F36ED16E9958...
