In [4]:
import pandas as pd
import geopandas as gpd

In [5]:
survey_path = "../../results/surveys/egt_2010/cleaned"
spatial_path = "../../results/surveys/egt_2010/spatial.parquet"
output_path = "../../results/road/survey.parquet"

In [6]:
# Load survey data
df_persons = pd.read_parquet("{}/persons.parquet".format(survey_path))
df_trips = pd.read_parquet("{}/trips.parquet".format(survey_path))
df_spatial = gpd.read_parquet(spatial_path)

In [7]:
# Prepare spatial data
df_spatial["origin_x"] = df_spatial["origin_geometry"].x
df_spatial["origin_y"] = df_spatial["origin_geometry"].y
df_spatial["destination_x"] = df_spatial["destination_geometry"].x
df_spatial["destination_y"] = df_spatial["destination_geometry"].y

In [8]:
# Only keep valid car trips
df_trips = df_trips[
    (df_trips["mode"] == "car") & df_trips["is_valid"]
].copy()

# Merge in weight
df_trips = pd.merge(df_trips, df_persons[["person_id", "weight"]])

# Merge in spatial data
df_trips = pd.merge(df_trips, df_spatial)[[
    "trip_id",
    "origin_x", "origin_y",
    "destination_x", "destination_y",
    "travel_time", "departure_time", "weight"
]].rename(columns = {
    "travel_time": "survey_travel_time_s"
})

In [9]:
# Output
df_trips.to_parquet(output_path)