# #06 - Exploring Integrations

## 1. Imports

In [1]:
import pymove
from pymove.utils import integration as it
from pymove.visualization import folium
from pymove import MoveDataFrame
import numpy as np
import pandas as pd

## 2. Load Data

In [134]:
import pandas as pd

df = pd.read_csv('geolife_sample.csv', parse_dates=['datetime'])
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")

move_df.head()

Unnamed: 0,lat,lon,datetime,id
0,39.984094,116.319236,2008-10-23 05:53:05,1
1,39.984198,116.319322,2008-10-23 05:53:06,1
2,39.984224,116.319402,2008-10-23 05:53:11,1
3,39.984211,116.319389,2008-10-23 05:53:16,1
4,39.984217,116.319422,2008-10-23 05:53:21,1


#### Size

In [5]:
#Tamanho
move_df.shape[0]

217653

#### Visualization

In [6]:
folium.plot_trajectories_with_folium(move_df, n_rows=10000)

## 3. Loading points of interest

In [7]:
import osmnx as ox

place = 'Beijing, China'
tags = {'amenity':True}
POIs = ox.pois_from_place(place=place, tags=tags, which_result=2)

  arr_value = np.array(value)


In [8]:
POIs.head()

Unnamed: 0,osmid,geometry,amenity,internet_access,name,shop,element_type,fee,created_by,name:en,...,townhall:type,alt_name_1,preschool,trolleybus,payment:wechatpay,capacity:charging,parking_space,type,ways,name:ja_kana
60170164,60170164,POINT (116.44976 39.93110),restaurant,wlan,The Bookworm,books,node,,,,...,,,,,,,,,,
269492188,269492188,POINT (116.26750 39.98087),toilets,,,,node,no,,,...,,,,,,,,,,
269693162,269693162,POINT (116.41313 39.88168),parking,,,,node,,Potlatch 0.9c,,...,,,,,,,,,,
269704764,269704764,POINT (116.38099 39.92879),ferry_terminal,,,,node,,,,...,,,,,,,,,,
271822412,271822412,POINT (116.15339 39.93954),place_of_worship,,法海寺,,node,,,Fahai Temple,...,,,,,,,,,,


#### Removing unrated (null) points of interest

In [10]:
POIs = POIs.dropna(subset = ["amenity"], inplace = False)

#### Adapting to the format needed for integration (With labels 'lat' and 'lon' referring to latitude and longitude, respectively)

In [143]:
points = POIs['geometry']
latitude = []
longitude = []
count = 0
for point in points:
    if point.type == 'Point': 
      count = count + 1
      latitude.append(point.x)
      longitude.append(point.y)
print("Número de Pontos: ", count)

Número de Pontos:  7781


In [12]:
POIs = POIs[0:count].copy()
POIs['lat'] = latitude
POIs['lon'] = longitude
POIs

POIs.shape[0]

7781

## 4. Integrating Points of Interest into the DataSet 

In [13]:
it.join_with_pois(move_df, POIs, label_id='osmid', label_poi_name='name')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


### Result

In [14]:
move_df.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,4152537289,6644477.0,东湖港山庄
1,39.984198,116.319322,2008-10-23 05:53:06,1,4152537289,6644463.0,东湖港山庄
2,39.984224,116.319402,2008-10-23 05:53:11,1,4152537289,6644457.0,东湖港山庄
3,39.984211,116.319389,2008-10-23 05:53:16,1,4152537289,6644459.0,东湖港山庄
4,39.984217,116.319422,2008-10-23 05:53:21,1,4152537289,6644457.0,东湖港山庄


### Point of interest closest to the trajectory

In [15]:
move_df['name_poi'].unique()

array(['东湖港山庄'], dtype=object)

In [16]:
POIs['type_poi'] = POIs['name']

## 5. Integrating Points of Interest into the DataSet (Using join_with_pois_optimizer)

In [17]:
it.join_with_pois_optimizer(move_df, POIs[0:6], label_poi_id='osmid', label_poi_name='name', dist_poi=np.array([10,9,1,4]))

Integration with POIs optimized...
... Resetting index to operation...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


VBox(children=(HTML(value=''), IntProgress(value=0, max=6)))

[116.319236 116.319322 116.319402 ... 116.327394 116.327433 116.32746 ]
[116.319236 116.319322 116.319402 ... 116.327394 116.327433 116.32746 ]
[116.319236 116.319322 116.319402 ... 116.327394 116.327433 116.32746 ]
[116.319236 116.319322 116.319402 ... 116.327394 116.327433 116.32746 ]
[116.319236 116.319322 116.319402 ... 116.327394 116.327433 116.32746 ]
Integration with POI was finalized


In [21]:
move_df.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,60170164,6691872.0,The Bookworm
1,39.984198,116.319322,2008-10-23 05:53:06,1,60170164,6691858.0,The Bookworm
2,39.984224,116.319402,2008-10-23 05:53:11,1,60170164,6691853.0,The Bookworm
3,39.984211,116.319389,2008-10-23 05:53:16,1,60170164,6691854.0,The Bookworm
4,39.984217,116.319422,2008-10-23 05:53:21,1,60170164,6691852.0,The Bookworm


## 6. Integrating Points of Interest into the Category-Based DataSet

### Executing

#### Removing POIs without categories

In [22]:
POIs = POIs.dropna(subset = ["amenity"], inplace = False)
POIs['amenity'].head()

0          restaurant
1             toilets
2             parking
3      ferry_terminal
4    place_of_worship
Name: amenity, dtype: object

In [23]:
it.join_with_pois_by_category(move_df, POIs, label_category='amenity', label_id='osmid')

Integration with POIs...
There are 95 categories
computing dist to category: restaurant


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: toilets


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: parking


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: ferry_terminal


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: place_of_worship


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: cafe


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: pub


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: fast_food


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: massage


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bus_station


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bank


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: waste_basket


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: post_office


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: police


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: post_box


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bicycle_parking


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: fuel


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: school


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: atm


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bicycle_rental


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: college


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: hospital


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bar


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: pharmacy


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: nightclub


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bench


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: cinema


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: fire_station


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: telephone


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: dorm


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: kindergarten


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: vending_machine


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: taxi


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: library


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: townhall


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: swimming_pool


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: shelter


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: waste_disposal


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: car_wash


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: courthouse


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: School


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: theatre


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: university


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: marketplace


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: parking_entrance


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: conference_centre


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: dentist


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: research_institute


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: veterinary


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: toilets;restaurant


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: investment_bank


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bureau_de_change


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: public_building


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: studio


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: fountain


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: recycling


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: grave_yard


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: clock


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: doctors


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: community_centre


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bbq


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: charging_station


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: Massage


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: spa


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: embassy


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: drinking_water


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: biergarten


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: clinic


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bicycle_repair_station


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: public_bookcase


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: arts_centre


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: hanging rings


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: punching_bag


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: billiards


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: pingpong


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: training


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: ice_cream


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: childcare


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: social_facility


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: parking_space


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: boat_rental


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: food_court


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: disused


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: toilets;post_box


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: casino


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: motorcycle_parking


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: public_bath


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: internet_cafe


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: events_venue


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: dojo


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: ranger_station


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: monument


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: music_school


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: bus_stop


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

computing dist to category: car_rental


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


In [24]:
move_df.head(10)

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi,id_restaurant,dist_restaurant,id_toilets,...,id_ranger_station,dist_ranger_station,id_monument,dist_monument,id_music_school,dist_music_school,id_bus_stop,dist_bus_stop,id_car_rental,dist_car_rental
0,39.984094,116.319236,2008-10-23 05:53:05,1,60170164,6691872.0,The Bookworm,4152537289,6644477.0,4152885689,...,5960914112,6753720.0,6298821904,6690501.0,7419675455,6703047.0,6815333802,6704564.0,7840263731,6710081.0
1,39.984198,116.319322,2008-10-23 05:53:06,1,60170164,6691858.0,The Bookworm,4152537289,6644463.0,4152885689,...,5960914112,6753706.0,6298821904,6690487.0,7419675455,6703034.0,6815333802,6704551.0,7840263731,6710067.0
2,39.984224,116.319402,2008-10-23 05:53:11,1,60170164,6691853.0,The Bookworm,4152537289,6644457.0,4152885689,...,5960914112,6753700.0,6298821904,6690481.0,7419675455,6703028.0,6815333802,6704545.0,7840263731,6710061.0
3,39.984211,116.319389,2008-10-23 05:53:16,1,60170164,6691854.0,The Bookworm,4152537289,6644459.0,4152885689,...,5960914112,6753702.0,6298821904,6690483.0,7419675455,6703030.0,6815333802,6704547.0,7840263731,6710063.0
4,39.984217,116.319422,2008-10-23 05:53:21,1,60170164,6691852.0,The Bookworm,4152537289,6644457.0,4152885689,...,5960914112,6753700.0,6298821904,6690481.0,7419675455,6703028.0,6815333802,6704545.0,7840263731,6710061.0
5,39.98471,116.319865,2008-10-23 05:53:23,1,60170164,6691786.0,The Bookworm,4152537289,6644391.0,4152885689,...,5960914112,6753634.0,6298821904,6690415.0,7419675455,6702961.0,6815333802,6704478.0,7840263731,6709995.0
6,39.984674,116.31981,2008-10-23 05:53:28,1,60170164,6691792.0,The Bookworm,4152537289,6644397.0,4152885689,...,5960914112,6753640.0,6298821904,6690421.0,7419675455,6702967.0,6815333802,6704484.0,7840263731,6710000.0
7,39.984623,116.319773,2008-10-23 05:53:33,1,60170164,6691798.0,The Bookworm,4152537289,6644403.0,4152885689,...,5960914112,6753646.0,6298821904,6690427.0,7419675455,6702974.0,6815333802,6704491.0,7840263731,6710007.0
8,39.984606,116.319732,2008-10-23 05:53:38,1,60170164,6691802.0,The Bookworm,4152537289,6644406.0,4152885689,...,5960914112,6753649.0,6298821904,6690431.0,7419675455,6702977.0,6815333802,6704494.0,7840263731,6710010.0
9,39.984555,116.319728,2008-10-23 05:53:43,1,60170164,6691807.0,The Bookworm,4152537289,6644412.0,4152885689,...,5960914112,6753654.0,6298821904,6690436.0,7419675455,6702982.0,6815333802,6704499.0,7840263731,6710015.0


## 7. Integrating events (points of interest with timestamp) to the DataSet

It integrates a normal dataframe with a CVP with information from Points of Interest. In this example, we will assign random datetime values to some POIs to simulate the operation.

In [64]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")

In [46]:
indexOfPois = np.arange(0, POIs.shape[0], POIs.shape[0]/20, dtype=np.int64)
indexOfPois

array([   0,  389,  778, 1167, 1556, 1945, 2334, 2723, 3112, 3501, 3890,
       4279, 4668, 5057, 5446, 5835, 6224, 6613, 7002, 7391], dtype=int64)

In [57]:
POIs_events = POIs.iloc[indexOfPois].copy()

In [58]:
indexOfMoveDf = np.arange(0, move_df.shape[0], move_df.shape[0]/20, dtype=np.int64)
move_df_random = move_df.iloc[indexOfMoveDf].copy()

In [59]:
POIs_events['datetime'] = move_df_random['datetime'].copy()

In [62]:
it.join_with_poi_datetime(move_df, POIs_events, label_date='datetime', time_window=900, label_event_id='osmid', label_event_type='amenity')

Integration with Events...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with event was completed


In [63]:
move_df.head()

Unnamed: 0,lat,lon,datetime,id,osmid,dist_event,amenity
0,39.984094,116.319236,2008-10-23 05:53:05,1,60170164,6708460.0,restaurant
1,39.984198,116.319322,2008-10-23 05:53:06,1,60170164,6708446.0,restaurant
2,39.984224,116.319402,2008-10-23 05:53:11,1,60170164,6708440.0,restaurant
3,39.984211,116.319389,2008-10-23 05:53:16,1,60170164,6708442.0,restaurant
4,39.984217,116.319422,2008-10-23 05:53:21,1,60170164,6708440.0,restaurant


## 8. Optimized Integrating events (points of interest with timestamp) to the DataSet 

In [68]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")
POIs_events["event_id"] = POIs_events["osmid"]
POIs_events["event_type"] = POIs_events["amenity"]
it.join_with_poi_datetime_optimizer(move_df, POIs_events)

Integration with Events...


VBox(children=(HTML(value=''), IntProgress(value=0, max=20)))

Integration with events was completed


In [70]:
move_df

Unnamed: 0,lat,lon,datetime,id,event_id,dist_event,event_type
0,39.984094,116.319236,2008-10-23 05:53:05,1,60170164,6.693211e+06,restaurant
1,39.984198,116.319322,2008-10-23 05:53:06,1,60170164,6.693197e+06,restaurant
2,39.984224,116.319402,2008-10-23 05:53:11,1,60170164,6.693192e+06,restaurant
3,39.984211,116.319389,2008-10-23 05:53:16,1,60170164,6.693193e+06,restaurant
4,39.984217,116.319422,2008-10-23 05:53:21,1,60170164,6.693191e+06,restaurant
...,...,...,...,...,...,...,...
217648,39.999896,116.327290,2009-03-19 05:46:02,5,,inf,
217649,39.999899,116.327352,2009-03-19 05:46:07,5,,inf,
217650,39.999945,116.327394,2009-03-19 05:46:12,5,,inf,
217651,40.000015,116.327433,2009-03-19 05:46:17,5,,inf,


## 9. Integration with Point of Interest HOME

The Home type contains, in addition to latitude, longitude and id, the address and city labels.

### Creating a home point

In [84]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")
home_df = move_df.iloc[300:302].copy()
home_df['formatted_address'] = ['Rua1, n02', 'Rua2, n03']
home_df['city'] = ['ChinaTown', 'ChinaTown']


### Using the function

In [85]:
move_df = move_df.iloc[0:2000].copy()
it.join_with_home_by_id(move_df, home_df, label_id='id')

Integration with Home...
...setting id as index


VBox(children=(HTML(value=''), IntProgress(value=0, max=1)))

... Resetting index


In [86]:
move_df.head()

Unnamed: 0,id,lat,lon,datetime,dist_home,home,city
0,1,39.984094,116.319236,2008-10-23 05:53:05,1031.34837,"Rua1, n02",ChinaTown
1,1,39.984198,116.319322,2008-10-23 05:53:06,1017.690147,"Rua1, n02",ChinaTown
2,1,39.984224,116.319402,2008-10-23 05:53:11,1011.332141,"Rua1, n02",ChinaTown
3,1,39.984211,116.319389,2008-10-23 05:53:16,1013.1527,"Rua1, n02",ChinaTown
4,1,39.984217,116.319422,2008-10-23 05:53:21,1010.95922,"Rua1, n02",ChinaTown


## 10. Merge of HOME with DataSet already integrated with POIs

### Integration

In [87]:
it.join_with_pois(move_df, POIs, label_id='osmid', label_poi_name='name')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=2000)))

Integration with POI was finalized


In [88]:
it.merge_home_with_poi(move_df)

merge home with POI using shortest distance


In [89]:
move_df.head()

Unnamed: 0,id,lat,lon,datetime,city,id_poi,dist_poi,name_poi
0,1,39.984094,116.319236,2008-10-23 05:53:05,ChinaTown,"Rua1, n02",1031.34837,home
1,1,39.984198,116.319322,2008-10-23 05:53:06,ChinaTown,"Rua1, n02",1017.690147,home
2,1,39.984224,116.319402,2008-10-23 05:53:11,ChinaTown,"Rua1, n02",1011.332141,home
3,1,39.984211,116.319389,2008-10-23 05:53:16,ChinaTown,"Rua1, n02",1013.1527,home
4,1,39.984217,116.319422,2008-10-23 05:53:21,ChinaTown,"Rua1, n02",1010.95922,home


## 11. Union functions

They have the purpose of joining several types of POI that mean the same thing, or similar things, in a single type of POI

#### Union of Banks

Converts POIs of the types "bank_filials", "bank_agencies", "bank_posts", "bank_PAE" and "bank" to a single type: "banks"

In [90]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")

#We create POIs with different type_poi that describe different types of banks to test
indexes_bp = np.linspace(0, move_df.shape[0], 6)
banks_pois = move_df[move_df.index.isin(indexes_bp)].copy()
banks_pois['id'] = [0,1,2,3,4]
banks_pois['type_poi'] = ['bancos_filiais', 'bancos_agencias', 'bancos_postos', 'bancos_PAE', 'bank']

banks_pois.head()

Unnamed: 0,lat,lon,datetime,id,type_poi
0,39.984094,116.319236,2008-10-23 05:53:05,0,bancos_filiais
43530,40.013694,116.311779,2008-11-08 04:06:39,1,bancos_agencias
87061,39.966366,116.355678,2008-12-09 01:53:08,2,bancos_postos
130591,40.004587,116.313867,2008-11-01 07:02:35,3,bancos_PAE
174122,22.162494,113.553571,2008-11-30 06:12:41,4,bank


In [92]:
#Join with POIs
it.join_with_pois(move_df, banks_pois, label_id='id', label_poi_name='type_poi')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


In [93]:
#Result
move_df.head(10)

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,bancos_filiais
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,bancos_filiais
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,bancos_filiais
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,bancos_filiais
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,bancos_filiais
5,39.98471,116.319865,2008-10-23 05:53:23,1,0,86.969343,bancos_filiais
6,39.984674,116.31981,2008-10-23 05:53:28,1,0,80.938365,bancos_filiais
7,39.984623,116.319773,2008-10-23 05:53:33,1,0,74.520547,bancos_filiais
8,39.984606,116.319732,2008-10-23 05:53:38,1,0,70.901768,bancos_filiais
9,39.984555,116.319728,2008-10-23 05:53:43,1,0,66.217975,bancos_filiais


In [95]:
#Checking the amount of each point assigned to each type of poi
bancos_filiais = move_df.loc[move_df['name_poi'] == 'bancos_filiais']
bancos_agencias = move_df.loc[move_df['name_poi'] == 'bancos_agencias']
bancos_postos = move_df.loc[move_df['name_poi'] == 'bancos_postos']
bancos_PAE = move_df.loc[move_df['name_poi'] == 'bancos_PA']
bank = move_df.loc[move_df['name_poi'] == 'bank']

print("Number of points close to each bank definition")
print("bancos_filiais: ", bancos_filiais.shape[0])
print("bancos_agencias: ", bancos_agencias.shape[0])
print("bancos_postos: ", bancos_postos.shape[0])
print("bancos_PAE: ", bancos_PAE.shape[0])
print("bank: ", bank.shape[0])

Quantidades de pontos próximos de cada definição de banco
bancos_filiais:  61325
bancos_agencias:  63319
bancos_postos:  30100
bancos_PAE:  0
bank:  5505


In [97]:
#Finally, the Union
it.union_poi_bank(move_df, label_poi="name_poi")

#Result
move_df.head()

union bank categories to one category
... There are 5 -- name_poi


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,banks
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,banks
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,banks
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,banks
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,banks


In [99]:
#Checking
move_df.loc[move_df['name_poi'] == 'banks'].shape[0]

217653

#### Union of Bus Stations

Converts "transit_station" and "bus_points" POIs to a single type: "bus_station"

In [101]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")


#We create POIs with different name_poi that describe different types of bus stops to test
indexes_bp = np.linspace(0, move_df.shape[0], 6)
bus_pois = move_df[move_df.index.isin(indexes_bp)].copy()
bus_pois['id'] = [0,1,2,3,4]
bus_pois['name_poi'] = ['transit_station', 'transit_station', 'pontos_de_onibus', 'transit_station', 'pontos_de_onibus']

#Result
bus_pois.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,0,transit_station
43530,40.013694,116.311779,2008-11-08 04:06:39,1,transit_station
87061,39.966366,116.355678,2008-12-09 01:53:08,2,pontos_de_onibus
130591,40.004587,116.313867,2008-11-01 07:02:35,3,transit_station
174122,22.162494,113.553571,2008-11-30 06:12:41,4,pontos_de_onibus


In [104]:
#Integration
it.join_with_pois(move_df, bus_pois, label_id='id', label_poi_name='name_poi')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


In [105]:
#Result
move_df.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,transit_station
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,transit_station
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,transit_station
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,transit_station
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,transit_station


In [106]:
transit_station = move_df.loc[move_df['name_poi'] == 'transit_station']
pontos_de_onibus = move_df.loc[move_df['name_poi'] == 'pontos_de_onibus']

print("Number of points near transit_station's: ", transit_station.shape[0])
print("Number of points close to pontos_de_onibus's: ", pontos_de_onibus.shape[0])

Quantidade de pontos proximos a transit_station's:  182048
Quantidade de pontos proximos a pontos_de_onibus's:  35605


In [107]:
#The union function
it.union_poi_bus_station(move_df, label_poi="name_poi")

move_df.head()

union bus station categories to one category


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,0,0.0,bus_station
1,39.984198,116.319322,2008-10-23 05:53:06,1,0,13.690153,bus_station
2,39.984224,116.319402,2008-10-23 05:53:11,1,0,20.223428,bus_station
3,39.984211,116.319389,2008-10-23 05:53:16,1,0,18.416895,bus_station
4,39.984217,116.319422,2008-10-23 05:53:21,1,0,20.933073,bus_station


In [108]:
#Checking

move_df.loc[move_df['name_poi'] == 'bus_station'].shape[0]

217653

#### Union of Bars and Restaurants

Converts "bar" and "restaurant" POIs to a single type: "bar-restaurant"

In [109]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")

#We create POIs with both types
indexes_br = np.linspace(0, move_df.shape[0], 5)
br_POIs = move_df[move_df.index.isin(indexes_br)].copy()
br_POIs['name_poi'] = ['bar','restaurant','restaurant', 'bar']

#Result
br_POIs.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,bar
54413,39.981381,116.327413,2008-11-14 13:27:42,1,restaurant
108826,39.979454,116.327098,2008-10-24 08:42:36,5,restaurant
163239,39.977242,116.337733,2008-11-24 13:20:39,5,bar


In [111]:
#Integration
it.join_with_pois(move_df, br_POIs, label_id='id', label_poi_name='name_poi')

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


In [112]:
#Result
move_df.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,bar
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,bar
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,bar
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,bar
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,bar


In [113]:
#Number of points close to each type
bar = move_df.loc[move_df['name_poi'] == 'bar']
restaurant = move_df.loc[move_df['name_poi'] == 'restaurant']

print("Closest type points 'bar': ", bar.shape[0])
print("Closest type points 'restaurant': ", restaurant.shape[0])

Pontos mais próximos do tipo 'bar':  188141
Pontos mais próximos do tipo 'restaurant':  29512


In [114]:
#Union of the two types of POIs into a single
it.union_poi_bar_restaurant(move_df, label_poi="name_poi")

#Result
move_df.head()

union restaurant and bar categories to one category


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,bar-restaurant
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,bar-restaurant
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,bar-restaurant
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,bar-restaurant
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,bar-restaurant


In [115]:
#Checking
move_df.loc[move_df['name_poi'] == 'bar-restaurant'].shape[0]

217653

#### Union of Parks

Converts "pracas_e_parques" and "park" POIs to a single type: "parks"

In [116]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")

#We create POIs with both types
indexes_p = np.linspace(0, move_df.shape[0], 5)
p_POIs = move_df[move_df.index.isin(indexes_p)].copy()
p_POIs['name_poi'] = ['pracas_e_parques','pracas_e_parques','park', 'park']

#Result
p_POIs.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,pracas_e_parques
54413,39.981381,116.327413,2008-11-14 13:27:42,1,pracas_e_parques
108826,39.979454,116.327098,2008-10-24 08:42:36,5,park
163239,39.977242,116.337733,2008-11-24 13:20:39,5,park


In [119]:
#Integration
it.join_with_pois(move_df, p_POIs, label_id='id', label_poi_name='name_poi')

#Result
move_df.head()

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,pracas_e_parques
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,pracas_e_parques
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,pracas_e_parques
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,pracas_e_parques
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,pracas_e_parques


In [120]:
#Number of points close to each type of POI
pracas_e_parques = move_df.loc[move_df['name_poi'] == 'pracas_e_parques']
park = move_df.loc[move_df['name_poi'] == 'park']

print("Number of points closest to pracas_e_parques: ", pracas_e_parques.shape[0])
print("Number of points closest to park: ", park.shape[0])

Quantidade de pontos mais próximos de pracas_e_parques:  156788
Quantidade de pontos mais próximos de park:  60865


In [122]:
#Union function
it.union_poi_parks(move_df, label_poi="name_poi")

move_df.head()

union parks categories to one category


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,parks
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,parks
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,parks
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,parks
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,parks


In [123]:
#Checking the new quantity
move_df.loc[move_df['name_poi'] == 'parks'].shape[0]

217653

#### Union of police points

In [124]:
move_df = MoveDataFrame(data=df, latitude="lat", longitude="lon", datetime="datetime")

#We create POIs with both types
indexes_pol = np.linspace(0, move_df.shape[0], 5)
pol_POIs = move_df[move_df.index.isin(indexes_pol)].copy()
pol_POIs['name_poi'] = ['distritos_policiais','police','distritos_policiais', 'distritos_policiais']

#Result
pol_POIs.head()

Unnamed: 0,lat,lon,datetime,id,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,distritos_policiais
54413,39.981381,116.327413,2008-11-14 13:27:42,1,police
108826,39.979454,116.327098,2008-10-24 08:42:36,5,distritos_policiais
163239,39.977242,116.337733,2008-11-24 13:20:39,5,distritos_policiais


In [126]:
#Integration
it.join_with_pois(move_df, pol_POIs, label_id='id', label_poi_name='name_poi')

move_df.head()

Integration with POIs...
... Resetting index to operation...


VBox(children=(HTML(value=''), IntProgress(value=0, max=217653)))

Integration with POI was finalized


Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,distritos_policiais
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,distritos_policiais
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,distritos_policiais
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,distritos_policiais
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,distritos_policiais


In [127]:
#Quantity of points closest to each type of point
distritos_policiais = move_df.loc[move_df['name_poi'] == 'distritos_policiais']

print("Number of points closest to distritos_policiais: ", distritos_policiais.shape[0])

Quantidade de pontos mais próximos de distritos_policiais:  200962


In [128]:
#Union funcion
it.union_poi_police(move_df, label_poi="name_poi")

union distritos policies and police categories


In [129]:
#Result
move_df.head()

Unnamed: 0,lat,lon,datetime,id,id_poi,dist_poi,name_poi
0,39.984094,116.319236,2008-10-23 05:53:05,1,1,0.0,police
1,39.984198,116.319322,2008-10-23 05:53:06,1,1,13.690153,police
2,39.984224,116.319402,2008-10-23 05:53:11,1,1,20.223428,police
3,39.984211,116.319389,2008-10-23 05:53:16,1,1,18.416895,police
4,39.984217,116.319422,2008-10-23 05:53:21,1,1,20.933073,police


In [130]:
#Checking
move_df.loc[move_df['name_poi'] == 'police'].shape[0]

217653

## 12. Integração entre trajetórias e áreas coletivas

Both trajectories and collective areas need to be GeoPandas DataFrames.

In [131]:
conda install geopandas

Collecting package metadata (current_repodata.json): ...working... done
Note: you may need to restart the kernel to use updated packages.




  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda





Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\mauri\.conda\envs\geo1

  added / updated specs:
    - geopandas


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopandas-0.8.1            |             py_0         925 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         925 KB

The following packages will be UPDATED:

  geopandas                 pkgs/main::geopandas-0.6.1-py_0 --> conda-forge::geopandas-0.8.1-py_0



Downloading and Extracting Packages

geopandas-0.8.1      | 925 KB    |            |   0% 
geopandas-0.8.1      | 925 KB    | 1          |   2% 
geopandas-0.8.1      | 925 KB    | ##5        |  26% 
geopandas-0.8.1      | 925 KB    | #######    |  71% 
geopandas-0.8.1      | 925 KB    | ########## | 100% 
Preparing transaction: ...working...

In [135]:
import geopandas

move_df = df[0:2000].copy()

gdf = geopandas.GeoDataFrame(move_df, geometry=geopandas.points_from_xy(move_df.lon, move_df.lat))
gdf.head()

Unnamed: 0,lat,lon,datetime,id,geometry
0,39.984094,116.319236,2008-10-23 05:53:05,1,POINT (116.31924 39.98409)
1,39.984198,116.319322,2008-10-23 05:53:06,1,POINT (116.31932 39.98420)
2,39.984224,116.319402,2008-10-23 05:53:11,1,POINT (116.31940 39.98422)
3,39.984211,116.319389,2008-10-23 05:53:16,1,POINT (116.31939 39.98421)
4,39.984217,116.319422,2008-10-23 05:53:21,1,POINT (116.31942 39.98422)


In [136]:
#Creating collective areas
indexes_ac = np.linspace(0, gdf.shape[0], 5)
area_c = move_df[move_df.index.isin(indexes_ac)].copy()
area_c

Unnamed: 0,lat,lon,datetime,id,geometry
0,39.984094,116.319236,2008-10-23 05:53:05,1,POINT (116.31924 39.98409)
500,40.006436,116.317701,2008-10-23 10:53:31,1,POINT (116.31770 40.00644)
1000,40.014125,116.306159,2008-10-23 23:43:56,1,POINT (116.30616 40.01412)
1500,39.979009,116.326873,2008-10-24 00:11:29,1,POINT (116.32687 39.97901)


In [138]:
#Integration
it.join_collective_areas(gdf, area_c)

Integration between trajectories and collectives areas


VBox(children=(HTML(value=''), IntProgress(value=0, max=4)))

In [139]:
gdf.head()

Unnamed: 0,lat,lon,datetime,id,geometry,violating
0,39.984094,116.319236,2008-10-23 05:53:05,1,POINT (116.31924 39.98409),True
1,39.984198,116.319322,2008-10-23 05:53:06,1,POINT (116.31932 39.98420),False
2,39.984224,116.319402,2008-10-23 05:53:11,1,POINT (116.31940 39.98422),False
3,39.984211,116.319389,2008-10-23 05:53:16,1,POINT (116.31939 39.98421),False
4,39.984217,116.319422,2008-10-23 05:53:21,1,POINT (116.31942 39.98422),False


### Viewing points on the map

#### Collective Area

In [140]:
folium.plot_markers(area_c)

#### Corresponding points

In [142]:
folium.plot_markers(gdf.loc[gdf['violating'] == True])