# Municipality DataFram collation

In [54]:
# Imports
import pandas as pd
import numpy as np
import json
from scipy.optimize import minimize
from geopy.distance import geodesic
import concurrent.futures
from tqdm import tqdm

In [7]:
# Read data
# municipalities = pd.read_csv('../data/raw/municipality_data.csv', encoding='utf-8', sep=';')
hospitals = pd.read_csv('../data/processed/hospitals_collated_reduced.csv')

with open('../data/raw/gemeinden_wus.geojson', 'r') as f:
    geojson = json.load(f)

# Determine geographic center of municipalities
We want to assign a hospital to each municipality. We will do so by assigning the geographically closest hospital to each municipality. Therefore we have to calculate the geographic center for each municipality using the data from the geojson file.

In [52]:

def geodesic_distance_sum(point, coords):
    """ Calculate the sum of geodesic distances from the point to all coordinates """
    lat, lon = point
    return sum(geodesic((lat, lon), coord).meters for coord in coords)

def geographic_median(coords):
    """
    Find the geographic median (point equidistant to all given coordinates)
    using geodesic distance.
    """
    # Start with the arithmetic mean as an initial guess
    initial_guess = np.mean(coords, axis=0)
    
    # Minimize the sum of geodesic distances to all points
    result = minimize(geodesic_distance_sum, initial_guess, args=(coords,), method='Nelder-Mead')
    
    return result.x  # Return the optimized latitude and longitude

geocenter = dict()

for mun in geojson['features']:
    name = mun['properties']['gemeinde_NAME']
    geo_coords = mun['geometry']['coordinates']
    coords = []

    c_list = [c for c in geo_coords[0][0]]
    [coords.append((latitute, longitude)) for longitude, latitute, _ in c_list]
    
    center = geographic_median(coords)
    geocenter[name] = center

#print(geojson['features'][0]['properties']['gemeinde_NAME'])
# print(geojson['features'][0]['geometry']['coordinates'])


KeyboardInterrupt: 

In [55]:
def geographic_median(coords):
    """
    Find the geographic median (point equidistant to all given coordinates)
    using geodesic distance.
    """
    # Start with the arithmetic mean as an initial guess
    initial_guess = np.mean(coords, axis=0)
    
    # Minimize the sum of geodesic distances to all points
    result = minimize(geodesic_distance_sum, initial_guess, args=(coords,), method='Nelder-Mead')
    
    return result.x  # Return the optimized latitude and longitude

def process_municipality(mun):
    name = mun['properties']['gemeinde_NAME']
    geo_coords = mun['geometry']['coordinates']
    coords = []

    c_list = [c for c in geo_coords[0][0]]
    [coords.append((latitute, longitude)) for longitude, latitute, _ in c_list]

    center = geographic_median(coords)
    return name, center

geocenter = {}

num_municipalities = len(geojson['features'])
pbar = tqdm(total=num_municipalities, desc="Processing municipalities")

with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = {executor.submit(process_municipality, mun): mun for mun in geojson['features']}
    for future in concurrent.futures.as_completed(futures):
        mun = futures[future]
        try:
            name, center = future.result()
            geocenter[name] = center
            pbar.update(1)
        except Exception as e:
            print(f"Error processing municipality {mun['properties']['gemeinde_NAME']}: {e}")
            pbar.update(1)

pbar.close()

Processing municipalities:   0%|          | 0/2175 [00:00<?, ?it/s]

Processing municipalities:   2%|▏         | 33/2175 [02:44<2:07:23,  3.57s/it]