# Imports

In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import LineString
from tqdm.contrib.concurrent import process_map

# Data Loading

In [2]:
df_porto_taxi = pd.read_parquet("data/porto_taxi.parquet")

In [3]:
df_porto_taxi.head()

Unnamed: 0,trip_id,call_type,origin_call,origin_stand,taxi_id,timestamp,day_type,geometry,travel_time_seconds
0,1372636858620000589,C,,,20000589,1372636858,A,"[[-8.618643, 41.141412], [-8.618499, 41.141376...",330
1,1372637303620000596,B,,7.0,20000596,1372637303,A,"[[-8.639847, 41.159826], [-8.640351, 41.159871...",270
2,1372636951620000320,C,,,20000320,1372636951,A,"[[-8.612964, 41.140359], [-8.613378, 41.14035]...",960
3,1372636854620000520,C,,,20000520,1372636854,A,"[[-8.574678, 41.151951], [-8.574705, 41.151942...",630
4,1372637091620000337,C,,,20000337,1372637091,A,"[[-8.645994, 41.18049], [-8.645949, 41.180517]...",420


In [4]:
df_porto_taxi.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1666758 entries, 0 to 1710669
Data columns (total 9 columns):
 #   Column               Non-Null Count    Dtype  
---  ------               --------------    -----  
 0   trip_id              1666758 non-null  int64  
 1   call_type            1666758 non-null  object 
 2   origin_call          362169 non-null   float64
 3   origin_stand         796267 non-null   float64
 4   taxi_id              1666758 non-null  int64  
 5   timestamp            1666758 non-null  int64  
 6   day_type             1666758 non-null  object 
 7   geometry             1666758 non-null  object 
 8   travel_time_seconds  1666758 non-null  int64  
dtypes: float64(2), int64(4), object(3)
memory usage: 127.2+ MB


In [5]:
df_porto_taxi["geometry"] = process_map(
    LineString, df_porto_taxi["geometry"], chunksize=1000, max_workers=20
)

  0%|          | 0/1666758 [00:00<?, ?it/s]

In [6]:
gdf_porto_taxi = gpd.GeoDataFrame(data=df_porto_taxi, geometry="geometry", crs="EPSG:4326")

# Data Visualization

In [7]:
gdf_porto_taxi.head().explore(
    column="call_type",  # make choropleth based on this column
    tooltip="call_type",  # show this column value in tooltip (on hover)
    popup=True,  # show all values in popup (on click)
    tiles="CartoDB positron",  # use "CartoDB positron" tiles
    cmap="Set1",  # use "Set1" matplotlib colormap
)