In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
from pyproj import Proj, transform, CRS

import requests
import json

from time import sleep 
from tqdm import tqdm

# Retrive Distance and Time from OSM

## Current Approach

In [4]:
bevölkerungs_gdf = gpd.read_file('./WLP/pharmacy_assigned.gpkg')
bevölkerungs_gdf['Alter'] = bevölkerungs_gdf['Alter'].apply(json.loads)
bevölkerungs_gdf['Geschlecht'] = bevölkerungs_gdf['Geschlecht'].apply(json.loads)

pharmacy_df = pd.read_csv('./OSM_Data/Würzburg-Apotheken.csv')
pharmacy_gdf = gpd.GeoDataFrame(pharmacy_df, geometry=gpd.points_from_xy(pharmacy_df['lon'], pharmacy_df['lat']), crs=CRS("EPSG:4326"))
pharmacy_gdf = pharmacy_gdf.to_crs(bevölkerungs_gdf.crs)

In [5]:
def calculate_distance_to_nearest_pharmacy(row):

    nearest_pharmacy = pharmacy_gdf[pharmacy_gdf['id'] == row.assigned_pharmacy]

    lon_build = row.lon
    lat_build = row.lat
    lon_pharm = nearest_pharmacy.lon.iloc[0]
    lat_pharm = nearest_pharmacy.lat.iloc[0]

    max_retries = 3  # Set the maximum number of retries

    for attempt in range(1, max_retries + 1):  # Loop for retries
        try:
            request_string = f"http://router.project-osrm.org/route/v1/driving/{lon_build},{lat_build};{lon_pharm},{lat_pharm}?overview=false"
            res = requests.get(request_string)
            res.raise_for_status()  # Raise an exception for non-200 status codes

            content = json.loads(res.content)
            distance = content['routes'][0]['legs'][0]['distance']
            duration = content['routes'][0]['legs'][0]['duration']
            return distance, duration

        except requests.exceptions.RequestException as e:
            print(f"Error occurred on attempt {attempt} for {lon_build},{lat_build} -> {lon_pharm},{lat_pharm}: {e}")
            sleep(30)  # Wait 30 seconds before retrying

    # If all retries fail, return 0 for distance and time
    print(f"All retries failed for {lon_build},{lat_build} -> {lon_pharm},{lat_pharm}")
    return 0, 0

In [None]:
chunksize = int(len(bevölkerungs_gdf) / 10)
chunks = [bevölkerungs_gdf.iloc[i * chunksize:(i + 1) * chunksize] for i in range(10)]
start_chunk = 0


for iteration, chunk in enumerate(chunks[start_chunk:]):
    # Calculate distance and time for each row in the chunk
    for index, row in tqdm(chunk.iterrows(), total=len(chunk)):
        distance, time = calculate_distance_to_nearest_pharmacy(row)
        chunk.loc[index, 'distance_pharmacy'] = distance
        chunk.loc[index, 'time_pharmacy'] = time

    # Convert specific columns and save the chunk (same as before)
    chunk['Alter'] = chunk['Alter'].apply(json.dumps)
    chunk['Geschlecht'] = chunk['Geschlecht'].apply(json.dumps)
    chunk.to_file(f'./Processed_WLP/Würzburg/pharmacy_assigned_{iteration + start_chunk}.gpkg', driver='GPKG')

print("All chunks processed and saved successfully!")

In [7]:
file_paths = []
for i in range(11):
    file_paths.append(f'./Processed_WLP/Würzburg/pharmacy_assigned_{i}.gpkg')

# Concatenation using list comprehension
bevölkerungs_gdf = pd.concat([gpd.read_file(path) for path in file_paths])
bevölkerungs_gdf.to_file('./WLP/pharmacy_assigned_complete.gpkg', driver='GPKG')