In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely.geometry as geo

In [2]:
survey_path = "../../../results/surveys/egt_2010/cleaned"
spatial_path = "../../../resources/surveys/egt_2010/spatial"

output_path = "../../../results/surveys/egt_2010/spatial.parquet"

In [3]:
if "papermill" in locals():
    survey_path = papermill.input["survey"]
    spatial_path = papermill.input["spatial"]

    output_path = papermill.output[0]

In [4]:
# Load spatial data
df_spatial = gpd.read_file("{}/carr100m.shp".format(spatial_path))[["IDENT", "geometry"]]
df_spatial.crs = "EPSG:27561"
df_spatial = df_spatial.rename(columns = { "IDENT": "location_id" })
df_spatial = df_spatial.to_crs("EPSG:2154")

In [5]:
# Load trip data
df_trips = pd.read_parquet("{}/trips.parquet".format(survey_path))[[
    "trip_id", "origin_cell", "destination_cell"]]

In [6]:
# Merge in origin
df_trips = pd.merge(df_trips, df_spatial.rename(columns = {
    "location_id": "origin_cell", "geometry": "origin_geometry"
}), on = "origin_cell", how = "inner")

# Merge in destination
df_trips = pd.merge(df_trips, df_spatial.rename(columns = {
    "location_id": "destination_cell", "geometry": "destination_geometry"
}), on = "destination_cell", how = "inner")

In [7]:
# Convert square to point
df_trips = gpd.GeoDataFrame(df_trips, crs = "EPSG:2154", geometry = "origin_geometry")
df_trips["origin_geometry"] = df_trips["origin_geometry"].centroid

df_trips = df_trips.set_geometry("destination_geometry")
df_trips["destination_geometry"] = df_trips["destination_geometry"].centroid

In [8]:
# Convert to line
df_trips["geometry"] = [
    geo.LineString([origin, destination])
    for origin, destination in zip(df_trips["origin_geometry"], df_trips["destination_geometry"])
]

df_trips = df_trips.set_geometry("geometry")

### Output

In [9]:
df_trips.crs = "EPSG:2154"
df_trips.to_parquet(output_path)