In [1]:
import os
import json
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
if os.getcwd().endswith("Topic1"):
    os.chdir("..")
    print(f"Changed to {os.getcwd()}")

Changed to c:\Users\toell\Documents\GitHub\Design-of-AI-driven-business-operation


In [2]:
demographics_population_column = "a_inw"
demographics_buurt_code_column = "gwb_code"
punt_travel_time_column = 'reistijd_min'
punt_buurt_code_column = 'bu_code'
willingness_to_cycle_column = 'willingness_to_cycle_percentage'

In [3]:
def willingness_to_cycle(tij, location):
    with open("data/cycle_willingness/fiets.json", "r") as f:
        model_params = json.load(f)

    if location not in model_params:
        raise ValueError("Unknown location")
    
    a, b = model_params[location]
    F_ij = 1 / (1 + np.exp(a + b * np.log(tij + 1e-10)))
    
    return F_ij

In [28]:
def filter_by_time(df, max_time):
    """Filters by travel time and removes duplicates based on bu_code, keeping the smallest value."""
    df_filtered = df[df['reistijd_min'] <= max_time]
    df_filtered = df_filtered.sort_values(by='reistijd_min', ascending=True, )
    df_filtered[punt_buurt_code_column] = df_filtered[punt_buurt_code_column].astype(str)
    df_filtered = df_filtered.drop_duplicates(subset=['bu_code'], keep='first', )
    return df_filtered

In [5]:
def load_demograhics():
    demographics_filename = "data/demographics/kwb-2023.csv"

    if os.path.exists(demographics_filename):
        df_demographics = pd.read_csv(demographics_filename, sep=";", low_memory=False)
        print("Loaded demographics from CSV")
    else:
        df_demographics = pd.read_excel("data/demographics/kwb-2023.xlsx", )
        df_demographics.to_csv(demographics_filename, sep=";", index=False)
        print("Loaded demographics from Excel and saved to CSV")

    return df_demographics

In [6]:
def get_buurt_ids(df):
    df_buurt = df[[punt_buurt_code_column]].astype(str)
    return df_buurt.values.flatten().tolist()

In [16]:
def get_total_inhabitants_in_buurts(df, within_mins, df_demographics=None):
    df_filtered = filter_by_time(df, within_mins)
    df_demographics = load_demograhics() if df_demographics is None else df_demographics

    buurt_ids = get_buurt_ids(df_filtered)
    df_location = df_demographics[df_demographics[demographics_buurt_code_column].astype(str).isin(buurt_ids)]
    total_inhabitants = df_location[demographics_population_column].sum()
    return total_inhabitants

In [17]:
def add_willingness_to_cycle_column(df, location,):
    willingness_array = willingness_to_cycle(df[punt_travel_time_column], location)
    df_filtered = df.copy()
    df_filtered[willingness_to_cycle_column] = willingness_array
    return df_filtered

In [18]:
def get_total_willingness_to_cycle_in_buurts(df, location, within_mins, df_demographics=None, ):
    df_demographics = load_demograhics() if df_demographics is None else df_demographics
    df_filtered = filter_by_time(df, within_mins)
    # Get the unique buurt codes from the filtered dataframe
    buurt_ids = get_buurt_ids(df_filtered)
    df_filtered = add_willingness_to_cycle_column(df_filtered, location,)
    # Filter the demographics dataframe to only include the relevant buurt codes
    df_location = df_demographics[df_demographics[demographics_buurt_code_column].astype(str).isin(buurt_ids)]

    if len(df_location) != len(df_filtered):
        print(f"Demographics and filtered dataframes do not match in length: {len(df_location)} vs {len(df_filtered)}, ignoring missing values")
    
    s1 = set(df_filtered[punt_buurt_code_column].unique())
    s2 = set(df_location[demographics_buurt_code_column].unique())
   # Only keep the rows in both dataframes that have matching buurt codes
    df_filtered = df_filtered[df_filtered[punt_buurt_code_column].isin(s1.intersection(s2))]
    df_location = df_location[df_location[demographics_buurt_code_column].isin(s1.intersection(s2))]
    # Sort the dataframes by the buurt code to ensure they match
    df_filtered = df_filtered.sort_values(by=punt_buurt_code_column)
    df_location = df_location.sort_values(by=demographics_buurt_code_column)

    total_willingness = int((df_filtered[willingness_to_cycle_column].values * df_location[demographics_population_column].values).sum())
    return total_willingness

In [None]:

def get_total_inhabitants_and_willingness(punt1, mode, within_mins, location="Elementary Schools", verbose=False):
    """
    Function to load the demographics data, filter the punt data by travel time,
    and calculate the total number of inhabitants and willingness to cycle within the specified time.

    Args:
        punt1 (str): The first point of interest, not "buurt". Second one is always "buurt".
        mode (str): The mode of transport, one of "fiets" or "ebike"
        within_mins (int): The maximum travel time in minutes.
        location (str): The location for which to calculate willingness to cycle.
        verbose (bool): If True, prints additional information.
    Returns:
        total_inhabitants (int): The total number of inhabitants within the specified time.
        total_willing_cyclists (int): The total number of willing cyclists within the specified time.
    """
    punt2 = "buurt"
    df_demographics = load_demograhics()
    df_punt = pd.read_csv(f"data/02_punt_tot_punt_analyse/{punt1}_naar_{punt2}_{mode}.csv", sep=";")
    nl_total = df_demographics[demographics_population_column][0]
    
    total_inhabitants = get_total_inhabitants_in_buurts(df_punt, within_mins=within_mins, df_demographics=df_demographics)
    total_willing_cyclists = get_total_willingness_to_cycle_in_buurts(df_punt, location=location, within_mins=within_mins, df_demographics=df_demographics)
    
    if verbose:
        print(f"Total inhabitants within {within_mins} minutes of {punt1} from {punt2}: {total_inhabitants} of {nl_total} = {total_inhabitants/nl_total:.2%} of the Netherlands")
        print(f"Total willingness to cycle of those: {total_willing_cyclists} of {total_inhabitants} = {total_willing_cyclists/total_inhabitants:.2%}")
    
    return total_inhabitants, total_willing_cyclists

In [33]:
within_mins = 5
punt = "hbo_wo" # ibis, hbo_wo, mbo, basis, highschool, bus, trein
mode = "fiets" # fiets, ebike
location = "Universities" # Elementary Schools, High Schools, Universities, Train - NS, Train - Local

total, total_willing = get_total_inhabitants_and_willingness(punt1=punt, mode=mode, within_mins=within_mins, location=location, verbose=True)

Loaded demographics from CSV
Total inhabitants within 5 minutes of hbo_wo from buurt: 956480 of 17811291 = 5.37% of the Netherlands
Total willingness to cycle of those: 917525 of 956480 = 95.93%
