# #06 - Exploring Integrations

## 0. Required library installations

For the execution of one of the integration functions that will be presented here, the geopandas library needs to be installed. To obtain some data for demonstrating the functions, the omnsx library also needs to be installed

```
conda install geopandas osmnx
```

## 1. Imports

In [1]:
import pymove
from pymove.utils import integration as it
from pymove.visualization import folium
from pymove import MoveDataFrame
import numpy as np
import pandas as pd
import geopandas
import osmnx

## 2. Load Data

In [2]:
move_df = pymove.read_csv('geolife_sample.csv', nrows=5000)

move_df.head()

Unnamed: 0,lat,lon,datetime,id
0,39.984094,116.319236,2008-10-23 05:53:05,1
1,39.984198,116.319322,2008-10-23 05:53:06,1
2,39.984224,116.319402,2008-10-23 05:53:11,1
3,39.984211,116.319389,2008-10-23 05:53:16,1
4,39.984217,116.319422,2008-10-23 05:53:21,1


#### Size

In [3]:
#Tamanho
move_df.shape[0]

5000

#### Visualization

In [4]:
folium.plot_trajectories_with_folium(move_df)

## 3. Loading points of interest

In [5]:
import osmnx as ox

place = 'Beijing, China'
tags = {'amenity':True}
POIs = ox.pois_from_place(place=place, tags=tags, which_result=2)

In [6]:
POIs.head()

Unnamed: 0,unique_id,osmid,element_type,amenity,geometry,highway,name,public_transport,addr:housenumber,takeaway,...,toilets:wheelchair,reservation,information,name:ang,disease:autism,charge,name:cn,maxheight,toilet,payment:feathercoin
0,node/644630368,644630368,node,fuel,POINT (116.36184 39.77529),,,,,,...,,,,,,,,,,
1,node/644630923,644630923,node,fuel,POINT (116.36298 39.77564),,,,,,...,,,,,,,,,,
2,node/683262746,683262746,node,fuel,POINT (116.33767 39.83148),bus_stop,,,,,...,,,,,,,,,,
3,node/728180870,728180870,node,fuel,POINT (116.32105 39.68177),,,,,,...,,,,,,,,,,
4,node/728181111,728181111,node,fuel,POINT (116.31957 39.68054),,,,,,...,,,,,,,,,,


#### Removing unrated (null) points of interest

In [7]:
POIs = POIs.dropna(subset = ["amenity"], inplace = False)

#### Adapting to the format needed for integration (With labels 'lat' and 'lon' referring to latitude and longitude, respectively)

In [8]:
POIs = POIs[POIs['geometry'].type == 'Point']
POIs['lon'] = POIs['geometry'].x
POIs['lat'] = POIs['geometry'].y

#### Filtering the POI dataframe to keep only the points inside the move_df bbox

In [9]:
pymove.filters.by_bbox(POIs, move_df.get_bbox(), inplace=True)

#### Visualization

In [10]:
m = folium.plot_trajectories_with_folium(move_df)
folium.add_poi_folium(POIs, slice_tags=['amenity'], base_map=m, poi_point='blue')

## 4. Integrating Points of Interest into the DataSet 

In [11]:
df_4 = move_df.copy()
it.join_with_pois(df_4, POIs, label_id='osmid', label_poi_name='name')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


### Result

In [12]:
df_4.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,5572452688,116.862844,太平洋影城(中关村店)
1,39.984198,116.319322,2008-10-23 05:53:06,1,5572452688,119.142692,太平洋影城(中关村店)
2,39.984224,116.319402,2008-10-23 05:53:11,1,5572452688,116.595117,太平洋影城(中关村店)
3,39.984211,116.319389,2008-10-23 05:53:16,1,5572452688,116.257378,太平洋影城(中关村店)
4,39.984217,116.319422,2008-10-23 05:53:21,1,5572452688,114.886759,太平洋影城(中关村店)


### Point of interest closest to each point of the trajectory

In [13]:
df_4['name_poi'].unique()

array(['太平洋影城(中关村店)', '东亚银行', '南京银行', '星巴克', '小吊梨汤', nan, '鑫蜀源', '必胜客',
       '潜渊', '上岛咖啡', '科苑餐厅', '2nd Place', '元绿回转寿司', '中信银行', 'HSBC', '咖啡王',
       '招商银行', '中国建设银行', 'Paradiso Coffee', '798 bar', 'Jazz Cafe',
       'Hundred Years Cafe', '安家小厨', '清青快餐', '听涛园',
       'China Construction Bank', '同仁堂', '北园餐厅', '北京银行', '交通银行', '宁波银行',
       '美嘉欢乐影城', '北京101中学', '西苑医院-哥哥家', 'Yu Xiao Mian Noodles', '茶大爷',
       "McDonald's", 'Pizza Hut', 'Starbucks', '云海肴', '兰州老妈拉面',
       'Пекинский Ботанический сад (Парковка) 植物园停车场'], dtype=object)

## 5. Integrating Points of Interest into the DataSet (Using join_with_pois_optimizer)

#### Selecting data

In [18]:
POIs_5 = POIs[0:10].copy()
POIs_5['type_poi'] = POIs_5['amenity']
df_5 = move_df.copy()

In [19]:
POIs_5['type_poi'].unique()

array(['toilets', 'fast_food', 'massage', 'waste_basket', 'parking',
       'cafe', 'restaurant', 'bank'], dtype=object)

#### Executing the function

In [20]:
it.join_with_pois_optimizer(df_5, POIs_5, label_poi_id='osmid', label_poi_name='name', dist_poi=np.array([100,9,1,50,50,10,20]))

Integration with POIs optimized...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=10)))

[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
[116.319236 116.319322 116.319402 ... 116.196344 116.196346 116.196345]
Integration with POI was finalized


In [21]:
df_5.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,269492188,1144.603484,toilets
1,39.984198,116.319322,2008-10-23 05:53:06,1,269492188,1131.338544,toilets
2,39.984224,116.319402,2008-10-23 05:53:11,1,269492188,1124.395459,toilets
3,39.984211,116.319389,2008-10-23 05:53:16,1,269492188,1126.193301,toilets
4,39.984217,116.319422,2008-10-23 05:53:21,1,269492188,1123.69258,toilets


## 6. Integrating Points of Interest into the Category-Based DataSet

### Executing

In [22]:
df_6 = move_df.copy()
it.join_with_pois_by_category(df_6, POIs, label_category='amenity', label_id='osmid')

Integration with POIs...
There are 45 categories


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


In [23]:
df_6.head(10)

Unnamed: 0,lat,lon,datetime,id,id_toilets,dist_toilets,id_fast_food,dist_fast_food,id_massage,dist_massage,...,id_food_court,dist_food_court,id_kindergarten,dist_kindergarten,id_dojo,dist_dojo,id_veterinary,dist_veterinary,id_library,dist_library
0,39.984094,116.319236,2008-10-23 05:53:05,1,1747557685,759.086189,4454637794,746.795383,276320142,1829.070918,...,5513051643,6057.080173,6043578775,10355.667263,5956304807,9900.192647,6946819367,794.656508,7072425636,2117.356677
1,39.984198,116.319322,2008-10-23 05:53:06,1,1747557685,748.697044,4454637794,759.354841,276320142,1828.951254,...,5513051643,6066.571719,6043578775,10361.77509,5956304807,9904.664237,6946819367,804.668395,7072425636,2106.063943
2,39.984224,116.319402,2008-10-23 05:53:11,1,1747557685,746.89415,4454637794,766.613626,276320142,1824.720741,...,5513051643,6073.829516,6043578775,10368.262902,5956304807,9910.636583,6946819367,812.000556,7072425636,2103.444432
3,39.984211,116.319389,2008-10-23 05:53:16,1,1747557685,748.145958,4454637794,764.884374,276320142,1824.886604,...,5513051643,6072.454104,6043578775,10367.307394,5956304807,9909.889396,6946819367,810.556457,7072425636,2104.843483
4,39.984217,116.319422,2008-10-23 05:53:21,1,1747557685,747.935642,4454637794,767.584671,276320142,1822.864349,...,5513051643,6075.342404,6043578775,10370.036941,5956304807,9912.473512,6946819367,813.444981,7072425636,2104.292926
5,39.98471,116.319865,2008-10-23 05:53:23,1,1365191805,696.31212,1817782002,823.553102,276320142,1821.434719,...,5513051643,6123.498669,6043578775,10402.199017,5956304807,9936.858388,6946819367,865.140914,7072425636,2051.495375
6,39.984674,116.31981,2008-10-23 05:53:28,1,1365191805,701.981805,1817782002,819.831406,276320142,1823.053098,...,5513051643,6118.082745,6043578775,10397.918507,5956304807,9933.183578,6946819367,859.457867,7072425636,2055.208921
7,39.984623,116.319773,2008-10-23 05:53:33,1,1365191805,708.463633,4454637794,817.940987,276320142,1822.486938,...,5513051643,6113.823188,6043578775,10395.327425,5956304807,9931.379313,6946819367,854.744821,7072425636,2060.686833
8,39.984606,116.319732,2008-10-23 05:53:38,1,1747557685,710.278329,4454637794,814.000513,276320142,1824.330899,...,5513051643,6110.014567,6043578775,10392.03359,5956304807,9928.398538,6946819367,850.846985,7072425636,2062.376862
9,39.984555,116.319728,2008-10-23 05:53:43,1,1747557685,715.757346,4454637794,810.430872,276320142,1821.4606,...,5513051643,6108.511161,6043578775,10392.245379,5956304807,9929.339455,6946819367,848.850348,7072425636,2068.020185


## 7. Integrating events (points of interest with timestamp) to the DataSet

It integrates a normal dataframe with Points of interest of events, that is, in addition to the labels referring to latitude and longitude, it also has a label referring to the datetime in which the event occurred. In this example, we will assign random date and time values to some POIs to simulate an operation.

In [24]:
indexOfPois = np.arange(0, POIs.shape[0], POIs.shape[0]/20, dtype=np.int64)
POIs_events = POIs.iloc[indexOfPois].copy()

In [25]:
randomIndexOfMoveDf = np.arange(0, move_df.shape[0], move_df.shape[0]/20, dtype=np.int64)
randomMoveDfSlice = move_df.iloc[randomIndexOfMoveDf].copy()

In [26]:
POIs_events['datetime'] = randomMoveDfSlice['datetime'].copy()

In [27]:
df_7 = move_df.copy()

In [28]:
it.join_with_poi_datetime(
    df_7, POIs_events, 
    label_date='datetime', time_window=900, 
    label_event_id='osmid', label_event_type='amenity'
)

Integration with Events...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with event was completed


In [29]:
df_7.head()

Unnamed: 0,lat,lon,datetime,id,osmid,dist_event,amenity
0,39.984094,116.319236,2008-10-23 05:53:05,1,269492188,4422.237186,toilets
1,39.984198,116.319322,2008-10-23 05:53:06,1,269492188,4430.488277,toilets
2,39.984224,116.319402,2008-10-23 05:53:11,1,269492188,4437.521909,toilets
3,39.984211,116.319389,2008-10-23 05:53:16,1,269492188,4436.29731,toilets
4,39.984217,116.319422,2008-10-23 05:53:21,1,269492188,4439.154806,toilets


## 8. Optimized Integrating events (points of interest with timestamp) to the DataSet 

In [31]:
df_8 = move_df.copy()
POIs_events["event_id"] = POIs_events["osmid"]
POIs_events["event_type"] = POIs_events["amenity"]
it.join_with_poi_datetime_optimizer(df_8, POIs_events)

Integration with Events...


VBox(children=(HTML(value=''), IntProgress(value=0, max=20)))

Integration with events was completed


In [32]:
df_8.head()

Unnamed: 0,lat,lon,datetime,id,event_id,dist_event,event_type
0,39.984094,116.319236,2008-10-23 05:53:05,1,269492188,875.362306,toilets
1,39.984198,116.319322,2008-10-23 05:53:06,1,269492188,872.603727,toilets
2,39.984224,116.319402,2008-10-23 05:53:11,1,269492188,867.307473,toilets
3,39.984211,116.319389,2008-10-23 05:53:16,1,269492188,867.809534,toilets
4,39.984217,116.319422,2008-10-23 05:53:21,1,269492188,865.436752,toilets


## 9. Integration with Point of Interest HOME

The Home type contains, in addition to latitude, longitude and id, the address and city labels.

### Creating a home point

In [33]:
df_9 = move_df.copy()
home_df = df_9.iloc[300:302].copy()
home_df['formatted_address'] = ['Rua1, n02', 'Rua2, n03']
home_df['city'] = ['ChinaTown', 'ChinaTown']

### Using the function

In [34]:
it.join_with_home_by_id(df_9, home_df, label_id='id')

Integration with Home...
...setting id as index
{'id'} {'datetime', 'lat', 'lon'}


VBox(children=(HTML(value=''), IntProgress(value=0, max=1)))

... Resetting index


In [35]:
df_9.head()

Unnamed: 0,id,lat,lon,datetime,dist_home,home,city
0,1,39.984094,116.319236,2008-10-23 05:53:05,1031.34837,"Rua1, n02",ChinaTown
1,1,39.984198,116.319322,2008-10-23 05:53:06,1017.690147,"Rua1, n02",ChinaTown
2,1,39.984224,116.319402,2008-10-23 05:53:11,1011.332141,"Rua1, n02",ChinaTown
3,1,39.984211,116.319389,2008-10-23 05:53:16,1013.1527,"Rua1, n02",ChinaTown
4,1,39.984217,116.319422,2008-10-23 05:53:21,1010.95922,"Rua1, n02",ChinaTown


## 10. Merge of HOME with DataSet already integrated with POIs

### Integration

In [36]:
it.join_with_pois(df_9, POIs, label_id='osmid', label_poi_name='name')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


In [37]:
it.merge_home_with_poi(df_9)

merge home with POI using shortest distance


In [38]:
df_9.head()

Unnamed: 0,id,lat,lon,datetime,city,id_poi,dist_poi,name_poi
0,1,39.984094,116.319236,2008-10-23 05:53:05,ChinaTown,5572452688,116.862844,太平洋影城(中关村店)
1,1,39.984198,116.319322,2008-10-23 05:53:06,ChinaTown,5572452688,119.142692,太平洋影城(中关村店)
2,1,39.984224,116.319402,2008-10-23 05:53:11,ChinaTown,5572452688,116.595117,太平洋影城(中关村店)
3,1,39.984211,116.319389,2008-10-23 05:53:16,ChinaTown,5572452688,116.257378,太平洋影城(中关村店)
4,1,39.984217,116.319422,2008-10-23 05:53:21,ChinaTown,5572452688,114.886759,太平洋影城(中关村店)


## 11. Union functions

They have the purpose of joining several types of POI that mean the same thing, or similar things, in a single type of POI

#### Union of Banks

Converts POIs of the types "bank_filials", "bank_agencies", "bank_posts", "bank_PAE" and "bank" to a single type: "banks"

In [39]:
df_banks = move_df.copy()

#We create POIs with different type_poi that describe different types of banks to test
indexes_bp = np.linspace(0, df_banks.shape[0], 6)
banks_pois = df_banks[df_banks.index.isin(indexes_bp)].copy()
banks_pois['id'] = [0,1,2,3,4]
banks_pois['type_poi'] = ['bancos_filiais', 'bancos_agencias', 'bancos_postos', 'bancos_PAE', 'bank']

banks_pois.head()

Unnamed: 0,lat,lon,datetime,id,type_poi
0,39.984094,116.319236,2008-10-23 05:53:05,0,bancos_filiais
1000,40.014125,116.306159,2008-10-23 23:43:56,1,bancos_agencias
2000,39.979558,116.312653,2008-10-24 03:26:10,2,bancos_postos
3000,39.97937,116.320649,2008-10-24 06:31:04,3,bancos_PAE
4000,40.003274,116.267484,2008-10-25 00:54:34,4,bank


In [40]:
#Join with POIs
it.join_with_pois(df_banks, banks_pois, label_id='id', label_poi_name='type_poi')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


In [41]:
#Result
df_banks.head(10)

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,bancos_filiais
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,bancos_filiais
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,bancos_filiais
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,bancos_filiais
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,bancos_filiais
5,39.98471,116.319865,2008-10-23 05:53:23,1,0,86.969343,bancos_filiais
6,39.984674,116.31981,2008-10-23 05:53:28,1,0,80.938365,bancos_filiais
7,39.984623,116.319773,2008-10-23 05:53:33,1,0,74.520547,bancos_filiais
8,39.984606,116.319732,2008-10-23 05:53:38,1,0,70.901768,bancos_filiais
9,39.984555,116.319728,2008-10-23 05:53:43,1,0,66.217975,bancos_filiais


In [42]:
#Checking the amount of each point assigned to each type of poi
bancos_filiais = df_banks.loc[df_banks['name_poi'] == 'bancos_filiais']
bancos_agencias = df_banks.loc[df_banks['name_poi'] == 'bancos_agencias']
bancos_postos = df_banks.loc[df_banks['name_poi'] == 'bancos_postos']
bancos_PAE = df_banks.loc[df_banks['name_poi'] == 'bancos_PA']
bank = df_banks.loc[df_banks['name_poi'] == 'bank']

print("Number of points close to each bank definition")
print("bancos_filiais: ", bancos_filiais.shape[0])
print("bancos_agencias: ", bancos_agencias.shape[0])
print("bancos_postos: ", bancos_postos.shape[0])
print("bancos_PAE: ", bancos_PAE.shape[0])
print("bank: ", bank.shape[0])

Number of points close to each bank definition
bancos_filiais:  579
bancos_agencias:  1407
bancos_postos:  916
bancos_PAE:  0
bank:  1238


In [43]:
#Finally, the Union
it.union_poi_bank(df_banks, label_poi="name_poi")

#Result
df_banks.head()

union bank categories to one category
... There are 5 -- name_poi


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,banks
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,banks
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,banks
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,banks
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,banks


In [44]:
#Checking
df_banks.loc[df_banks['name_poi'] == 'banks'].shape[0]

5000

#### Union of Bus Stations

Converts "transit_station" and "bus_points" POIs to a single type: "bus_station"

In [45]:
df_bus = move_df.copy()


#We create POIs with different name_poi that describe different types of bus stops to test
indexes_bp = np.linspace(0, df_bus.shape[0], 6)
bus_pois = df_bus[df_bus.index.isin(indexes_bp)].copy()
bus_pois['id'] = [0,1,2,3,4]
bus_pois['name_poi'] = ['transit_station', 'transit_station', 'pontos_de_onibus', 'transit_station', 'pontos_de_onibus']

#Result
bus_pois.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,0,transit_station
1000,40.014125,116.306159,2008-10-23 23:43:56,1,transit_station
2000,39.979558,116.312653,2008-10-24 03:26:10,2,pontos_de_onibus
3000,39.97937,116.320649,2008-10-24 06:31:04,3,transit_station
4000,40.003274,116.267484,2008-10-25 00:54:34,4,pontos_de_onibus


In [46]:
#Integration
it.join_with_pois(df_bus, bus_pois, label_id='id', label_poi_name='name_poi')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


In [47]:
#Result
df_bus.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,transit_station
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,transit_station
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,transit_station
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,transit_station
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,transit_station


In [48]:
transit_station = df_bus.loc[df_bus['name_poi'] == 'transit_station']
pontos_de_onibus = df_bus.loc[df_bus['name_poi'] == 'pontos_de_onibus']

print("Number of points near transit_station's: ", transit_station.shape[0])
print("Number of points close to pontos_de_onibus's: ", pontos_de_onibus.shape[0])

Number of points near transit_station's:  2846
Number of points close to pontos_de_onibus's:  2154


In [49]:
#The union function
it.union_poi_bus_station(df_bus, label_poi="name_poi")

df_bus.head()

union bus station categories to one category


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,bus_station
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,bus_station
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,bus_station
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,bus_station
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,bus_station


In [50]:
#Checking

df_bus.loc[df_bus['name_poi'] == 'bus_station'].shape[0]

5000

#### Union of Bars and Restaurants

Converts "bar" and "restaurant" POIs to a single type: "bar-restaurant"

In [51]:
df_bar = move_df.copy()

#We create POIs with both types
indexes_br = np.linspace(0, df_bar.shape[0], 5)
br_POIs = df_bar[df_bar.index.isin(indexes_br)].copy()
br_POIs['name_poi'] = ['bar','restaurant','restaurant', 'bar']

#Result
br_POIs.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,bar
1250,39.999756,116.322556,2008-10-23 23:58:02,1,restaurant
2500,39.979533,116.323162,2008-10-24 05:31:19,1,restaurant
3750,39.996251,116.293837,2008-10-25 00:40:56,1,bar


In [52]:
#Integration
it.join_with_pois(df_bar, br_POIs, label_id='id', label_poi_name='name_poi')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


In [53]:
#Result
df_bar.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,bar
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,bar
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,bar
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,bar
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,bar


In [54]:
#Number of points close to each type
bar = df_bar.loc[df_bar['name_poi'] == 'bar']
restaurant = df_bar.loc[df_bar['name_poi'] == 'restaurant']

print("Closest type points 'bar': ", bar.shape[0])
print("Closest type points 'restaurant': ", restaurant.shape[0])

Closest type points 'bar':  2539
Closest type points 'restaurant':  2461


In [55]:
#Union of the two types of POIs into a single
it.union_poi_bar_restaurant(df_bar, label_poi="name_poi")

#Result
df_bar.head()

union restaurant and bar categories to one category


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,bar-restaurant
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,bar-restaurant
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,bar-restaurant
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,bar-restaurant
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,bar-restaurant


In [56]:
#Checking
df_bar.loc[df_bar['name_poi'] == 'bar-restaurant'].shape[0]

5000

#### Union of Parks

Converts "pracas_e_parques" and "park" POIs to a single type: "parks"

In [57]:
df_parks = move_df.copy()

#We create POIs with both types
indexes_p = np.linspace(0, df_parks.shape[0], 5)
p_POIs = df_parks[df_parks.index.isin(indexes_p)].copy()
p_POIs['name_poi'] = ['pracas_e_parques','pracas_e_parques','park', 'park']

#Result
p_POIs.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,pracas_e_parques
1250,39.999756,116.322556,2008-10-23 23:58:02,1,pracas_e_parques
2500,39.979533,116.323162,2008-10-24 05:31:19,1,park
3750,39.996251,116.293837,2008-10-25 00:40:56,1,park


In [58]:
#Integration
it.join_with_pois(df_parks, p_POIs, label_id='id', label_poi_name='name_poi')

#Result
df_parks.head()

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,pracas_e_parques
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,pracas_e_parques
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,pracas_e_parques
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,pracas_e_parques
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,pracas_e_parques


In [59]:
#Number of points close to each type of POI
pracas_e_parques = df_parks.loc[df_parks['name_poi'] == 'pracas_e_parques']
park = df_parks.loc[df_parks['name_poi'] == 'park']

print("Number of points closest to pracas_e_parques: ", pracas_e_parques.shape[0])
print("Number of points closest to park: ", park.shape[0])

Number of points closest to pracas_e_parques:  2716
Number of points closest to park:  2284


In [60]:
#Union function
it.union_poi_parks(df_parks, label_poi="name_poi")

df_parks.head()

union parks categories to one category


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,parks
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,parks
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,parks
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,parks
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,parks


In [61]:
#Checking the new quantity
df_parks.loc[df_parks['name_poi'] == 'parks'].shape[0]

5000

#### Union of police points

In [62]:
df_police = move_df.copy()

#We create POIs with both types
indexes_pol = np.linspace(0, df_police.shape[0], 5)
pol_POIs = df_police[df_police.index.isin(indexes_pol)].copy()
pol_POIs['name_poi'] = ['distritos_policiais','police','distritos_policiais', 'distritos_policiais']

#Result
pol_POIs.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,distritos_policiais
1250,39.999756,116.322556,2008-10-23 23:58:02,1,police
2500,39.979533,116.323162,2008-10-24 05:31:19,1,distritos_policiais
3750,39.996251,116.293837,2008-10-25 00:40:56,1,distritos_policiais


In [63]:
#Integration
it.join_with_pois(df_police, pol_POIs, label_id='id', label_poi_name='name_poi')

df_police.head()

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=5000)))

Integration with POI was finalized


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,distritos_policiais
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,distritos_policiais
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,distritos_policiais
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,distritos_policiais
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,distritos_policiais


In [64]:
#Quantity of points closest to each type of point
distritos_policiais = df_police.loc[df_police['name_poi'] == 'distritos_policiais']

print("Number of points closest to distritos_policiais: ", distritos_policiais.shape[0])

Number of points closest to distritos_policiais:  3420


In [65]:
#Union funcion
it.union_poi_police(df_police, label_poi="name_poi")

union distritos policies and police categories


In [66]:
#Result
df_police.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,police
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,police
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,police
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,police
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,police


In [67]:
#Checking
df_police.loc[df_police['name_poi'] == 'police'].shape[0]

5000

## 12. Integração entre trajetórias e áreas coletivas

In [68]:
df_pd = pd.read_csv('geolife_sample.csv')
df_12 = df_pd[0:2000]
gdf = geopandas.GeoDataFrame(df_12, geometry=geopandas.points_from_xy(df_12.lon, df_12.lat))
gdf.head()

Unnamed: 0,lat,lon,datetime,id,geometry
0,39.984094,116.319236,2008-10-23 05:53:05,1,POINT (116.31924 39.98409)
1,39.984198,116.319322,2008-10-23 05:53:06,1,POINT (116.31932 39.98420)
2,39.984224,116.319402,2008-10-23 05:53:11,1,POINT (116.31940 39.98422)
3,39.984211,116.319389,2008-10-23 05:53:16,1,POINT (116.31939 39.98421)
4,39.984217,116.319422,2008-10-23 05:53:21,1,POINT (116.31942 39.98422)


In [69]:
#Creating collective areas
indexes_ac = np.linspace(0, gdf.shape[0], 5)
area_c = df_12[df_12.index.isin(indexes_ac)].copy()
area_c

Unnamed: 0,lat,lon,datetime,id,geometry
0,39.984094,116.319236,2008-10-23 05:53:05,1,POINT (116.31924 39.98409)
500,40.006436,116.317701,2008-10-23 10:53:31,1,POINT (116.31770 40.00644)
1000,40.014125,116.306159,2008-10-23 23:43:56,1,POINT (116.30616 40.01412)
1500,39.979009,116.326873,2008-10-24 00:11:29,1,POINT (116.32687 39.97901)


In [70]:
#Integration
it.join_collective_areas(gdf, area_c)

Integration between trajectories and collectives areas


VBox(children=(HTML(value=''), IntProgress(value=0, max=4)))

In [71]:
gdf.head()

Unnamed: 0,lat,lon,datetime,id,geometry,violating
0,39.984094,116.319236,2008-10-23 05:53:05,1,POINT (116.31924 39.98409),True
1,39.984198,116.319322,2008-10-23 05:53:06,1,POINT (116.31932 39.98420),False
2,39.984224,116.319402,2008-10-23 05:53:11,1,POINT (116.31940 39.98422),False
3,39.984211,116.319389,2008-10-23 05:53:16,1,POINT (116.31939 39.98421),False
4,39.984217,116.319422,2008-10-23 05:53:21,1,POINT (116.31942 39.98422),False


### Viewing points on the map

#### Collective Area points 

In [72]:
folium.plot_markers(area_c)

#### Corresponding points

In [73]:
folium.plot_markers(gdf.loc[gdf['violating'] == True])