In [1]:
import pandas as pd
import numpy as np

In [2]:
survey_path = "../../results/surveys/egt_2010/cleaned"
output_path = "../../results/surveys/egt_2010/availabilities.parquet"

In [3]:
if "papermill" in locals():
    survey_path = papermill.input[0]
    output_path = papermill.output[0]

In [4]:
df_households = pd.read_parquet("{}/households.parquet".format(survey_path))
df_persons = pd.read_parquet("{}/persons.parquet".format(survey_path))
df_trips = pd.read_parquet("{}/trips.parquet".format(survey_path))

### Use-based availabilities
First figure out the persons actually using car and then determine if there are remaining cars / bikes in the household.

In [5]:
for mode in ("car", "motorbike", "bicycle"):
    df_vehicles = df_households[["household_id", "number_of_{}s".format(mode)]]
    df_active = df_trips[df_trips["mode"] == mode].drop_duplicates("person_id")[["household_id", "person_id"]]
    df_vehicles = pd.merge(df_vehicles, df_active.groupby("household_id").size().reset_index(name = "active"), how = "left")
    df_vehicles["active"] = df_vehicles["active"].fillna(0).astype(int)
    df_vehicles["unused"] = np.maximum(0, df_vehicles["number_of_{}s".format(mode)] - df_vehicles["active"])
    
    print("Consistent", mode, np.mean(df_vehicles["number_of_{}s".format(mode)] >= df_vehicles["active"]))

    df_persons["{}_availability_by_use".format(mode)] = df_persons["person_id"].isin(df_active["person_id"])
    df_persons["{}_availability_by_use".format(mode)] |= df_persons["household_id"].isin(df_vehicles[
        df_vehicles["unused"] > 0]["household_id"])

Consistent car 0.9789721195834733
Consistent motorbike 0.9963050050386295
Consistent bicycle 0.9950285522337924


### Vehicle-based availabilities

Compare number of potential users (driving permit for car) in a household vs. the number of available cars / bikes

In [6]:
# Cars
df_availability = df_households[["household_id", "number_of_cars"]]
df_availability = pd.merge(
    df_availability,
    df_persons[
        df_persons["has_driving_permit"]
    ].groupby("household_id").size().reset_index(name = "persons"),
    how = "left")

df_availability["car_availability_by_vehicles"] = "none"
df_availability.loc[
    df_availability["number_of_cars"] > 0, "car_availability_by_vehicles"] = "some"
df_availability.loc[
    df_availability["number_of_cars"] >= df_availability["persons"], "car_availability_by_vehicles"] = "all"

df_persons = pd.merge(df_persons, 
    df_availability[["household_id", "car_availability_by_vehicles"]], on = "household_id", how = "left")

In [7]:
df_persons.value_counts(["car_availability_by_vehicles", "car_availability_by_use"]).sort_index()

car_availability_by_vehicles  car_availability_by_use
all                           False                       4541
                              True                       15100
none                          False                       6949
                              True                          51
some                          False                       3260
                              True                        5274
Name: count, dtype: int64

In [8]:
# Motorbikes
df_availability = df_households[["household_id", "number_of_motorbikes"]]
df_availability = pd.merge(
    df_availability,
    df_persons[
        df_persons["has_motorbike_permit"]
    ].groupby("household_id").size().reset_index(name = "persons"),
    how = "left")

df_availability["motorbike_availability_by_vehicles"] = "none"
df_availability.loc[
    df_availability["number_of_motorbikes"] > 0, "motorbike_availability_by_vehicles"] = "some"
df_availability.loc[
    df_availability["number_of_motorbikes"] >= df_availability["persons"], "motorbike_availability_by_vehicles"] = "all"

df_persons = pd.merge(df_persons, 
    df_availability[["household_id", "motorbike_availability_by_vehicles"]], on = "household_id", how = "left")

In [9]:
df_persons.value_counts(["motorbike_availability_by_vehicles", "motorbike_availability_by_use"]).sort_index()

motorbike_availability_by_vehicles  motorbike_availability_by_use
all                                 False                              125
                                    True                               884
none                                False                            31376
                                    True                                21
some                                False                              624
                                    True                              2145
Name: count, dtype: int64

In [10]:
# Bicycles
df_availability = df_households[["household_id", "number_of_bicycles"]]
df_availability = pd.merge(
    df_availability,
    df_persons.groupby("household_id").size().reset_index(name = "persons"),
    how = "left")

df_availability["bicycle_availability_by_vehicles"] = "none"
df_availability.loc[
    df_availability["number_of_bicycles"] > 0, "bicycle_availability_by_vehicles"] = "some"
df_availability.loc[
    df_availability["number_of_bicycles"] >= df_availability["persons"], "bicycle_availability_by_vehicles"] = "all"

df_persons = pd.merge(df_persons, 
    df_availability[["household_id", "bicycle_availability_by_vehicles"]], on = "household_id", how = "left")

In [11]:
df_persons.value_counts(["bicycle_availability_by_vehicles", "bicycle_availability_by_use"]).sort_index()

bicycle_availability_by_vehicles  bicycle_availability_by_use
all                               True                            9263
none                              False                          14306
                                  True                              74
some                              False                            187
                                  True                           11345
Name: count, dtype: int64

### Output

In [12]:
df_persons[[
    "person_id",
    "car_availability_by_use", "car_availability_by_vehicles",
    "motorbike_availability_by_use", "motorbike_availability_by_vehicles",
    "bicycle_availability_by_use", "bicycle_availability_by_vehicles"
]].to_parquet(output_path)