In [13]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, Polygon
import os

from sklearn.neighbors import BallTree
import numpy as np


In [20]:
#define Functions for nearest neighbor analysis (Quelle: https://github.com/mario-gellrich-zhaw/spatial_data_analysis)
def get_nearest(src_points, candidates, k_neighbors=1):
    """Find nearest neighbors for all source points from a set of candidate points"""

    # Create tree from the candidate points
    tree = BallTree(candidates, leaf_size=15, metric='haversine')

    # Find closest points and distances
    distances, indices = tree.query(src_points, k=k_neighbors)

    # Transpose to get distances and indices into arrays
    distances = distances.transpose()
    indices = indices.transpose()

    # Get closest indices and distances (i.e. array at index 0)
    # note: for the second closest points, you would take index 1, etc.
    closest = indices[0]
    closest_dist = distances[0]

    # Return indices and distances
    return (closest, closest_dist)


def nearest_neighbor(left_gdf, right_gdf, return_dist=False):
    """
    For each point in left_gdf, find closest point in right GeoDataFrame and return them.

    NOTICE: Assumes that the input Points are in WGS84 projection (lat/lon).
    """

    left_geom_col = left_gdf.geometry.name
    right_geom_col = right_gdf.geometry.name

    # Ensure that index in right gdf is formed of sequential numbers
    right = right_gdf.copy().reset_index(drop=True)

    # Parse coordinates from points and insert them into a numpy array as RADIANS
    left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
    right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())

    # Find the nearest points
    # -----------------------
    # closest ==> index in right_gdf that corresponds to the closest point
    # dist ==> distance between the nearest neighbors (in meters)

    closest, dist = get_nearest(src_points=left_radians, candidates=right_radians)

    # Return points from right GeoDataFrame that are closest to points in left GeoDataFrame
    closest_points = right.loc[closest]

    # Ensure that the index corresponds the one in left_gdf
    closest_points = closest_points.reset_index(drop=True)

    # Add distance if requested
    if return_dist:
        # Convert to meters from radians
        earth_radius = 6371000  # meters
        closest_points['distance'] = dist * earth_radius

    return closest_points

In [15]:
# import hospital data as geodataframe
gdf_spital = gpd.read_file(r'C:\Users\gigim\Documents\GitHub\mlp\geodata\\data_spital.gpkg')

In [6]:
# read csv file of addresses with long and lat and convert df to geodataframe

df_24 = pd.read_csv('immoscout24_geocoded.csv', delimiter='|')

# Create Point geometries from latitude and longitude
geometry = [Point(xy) for xy in zip(df_24['longitude'], df_24['latitude'])]

# Create a GeoDataFrame, using the DataFrame and the geometries
gdf_24 = gpd.GeoDataFrame(df_24, geometry=geometry)

# Now you can use functions similar to those you would use with .gpkg files
gdf_24.head()

In [34]:
# remove nan values in geodaraframe

print(gdf_24.shape)
# Filter out rows where 'geometry' is NaN
gdf_24.dropna(subset=['longitude'], inplace=True)

print(gdf_24.shape)

(637, 32)
(637, 32)


In [35]:
gdf_spital.columns

Index(['name', 'ort', 'plz', 'geometry'], dtype='object')

In [23]:
# Dropping columns in-place
gdf_spital.drop(columns=['adr_inter', 'adresse', 'adrzus_int', 'behindertenparkplatz',
       'bemerkung', 'ccmail', 'da', 'datum', 'datum_cms', 'dep', 'editor',
       'erforderlichedokumente', 'fax', 'hausnummer', 'hindernisfreiheit',
       'infrastruktur', 'isbetriebsferien_gebaeude',
       'isbetriebsferien_schalter', 'kategorie', 'mail', 'namenzus',
       'objectid', 'oeffnungszeiten_gebaeude_di',
       'oeffnungszeiten_gebaeude_do', 'oeffnungszeiten_gebaeude_fr',
       'oeffnungszeiten_gebaeude_mi', 'oeffnungszeiten_gebaeude_mo',
       'oeffnungszeiten_gebaeude_sa', 'oeffnungszeiten_gebaeude_so',
       'oeffnungszeiten_schalter_di', 'oeffnungszeiten_schalter_do',
       'oeffnungszeiten_schalter_fr', 'oeffnungszeiten_schalter_mi',
       'oeffnungszeiten_schalter_mo', 'oeffnungszeiten_schalter_sa',
       'oeffnungszeiten_schalter_so', 'poi_id', 'postadresse',
       'publish_internet', 'strasse', 'suchen', 'tel', 'tel2', 'www',
       'zahlungsmittel_internet', 'zahlungsmittel_schalter',
       'zahlungsmittel_telefon', 'zvv_label', 'zvv_link',], inplace=True)


KeyError: "['adr_inter', 'adresse', 'adrzus_int', 'behindertenparkplatz', 'bemerkung', 'ccmail', 'da', 'datum', 'datum_cms', 'dep', 'editor', 'erforderlichedokumente', 'fax', 'hausnummer', 'hindernisfreiheit', 'infrastruktur', 'isbetriebsferien_gebaeude', 'isbetriebsferien_schalter', 'kategorie', 'mail', 'namenzus', 'objectid', 'oeffnungszeiten_gebaeude_di', 'oeffnungszeiten_gebaeude_do', 'oeffnungszeiten_gebaeude_fr', 'oeffnungszeiten_gebaeude_mi', 'oeffnungszeiten_gebaeude_mo', 'oeffnungszeiten_gebaeude_sa', 'oeffnungszeiten_gebaeude_so', 'oeffnungszeiten_schalter_di', 'oeffnungszeiten_schalter_do', 'oeffnungszeiten_schalter_fr', 'oeffnungszeiten_schalter_mi', 'oeffnungszeiten_schalter_mo', 'oeffnungszeiten_schalter_sa', 'oeffnungszeiten_schalter_so', 'poi_id', 'postadresse', 'publish_internet', 'strasse', 'suchen', 'tel', 'tel2', 'www', 'zahlungsmittel_internet', 'zahlungsmittel_schalter', 'zahlungsmittel_telefon', 'zvv_label', 'zvv_link'] not found in axis"

In [36]:
### use nna functions to deterine the clostest point
# Closest supermarket of each apartment
closest_hospitals = nearest_neighbor(gdf_24, gdf_spital, return_dist=True)

print(len(closest_hospitals), '==', len(gdf_24))    

# Rename the geometry of closest stops gdf so that we can easily identify it
closest_hospitals = closest_hospitals.rename(columns={'geometry': 'closest_sup_geom'})
closest_hospitals.head()

637 == 637


Unnamed: 0,name,ort,plz,closest_sup_geom,distance
0,Klinik Innere Medizin,Zürich,8037,POINT (2681675.000 1250346.000),3495497.0
1,Klinik Innere Medizin,Zürich,8037,POINT (2681675.000 1250346.000),3499138.0
2,Klinik Innere Medizin,Zürich,8037,POINT (2681675.000 1250346.000),3497255.0
3,Klinik Innere Medizin,Zürich,8037,POINT (2681675.000 1250346.000),3506023.0
4,Klinik Innere Medizin,Zürich,8037,POINT (2681675.000 1250346.000),3506011.0


In [38]:
result = pd.merge(closest_hospitals, 
                  gdf_24, 
                  left_index=True, 
                  right_index=True)

In [41]:
result.to_clipboard()

In [53]:
## try to calculate distance using haversine (Quelle: https://towardsdatascience.com/calculating-distance-between-two-geolocations-in-python-26ad3afe287b)
        # pip install haversine
import haversine as hs

locc=(28.426846,77.088834)
loc2=(28.394231,77.050308)
print(hs.haversine(locc,loc2))
df_24['distance_hosp'] = None

for index, row in df_24.iterrows():
    loc1 = (row['longitude'], row['latitude'])
    min_distance = None
    for index2, row2 in gdf_spital.iterrows:
    df_24.at[index, 'distance_hosp'] = hs.haversine(loc1,loc2)

print(df_24['distance_hosp'])
    

5.229712941541709
0      3806.732885
1      3807.115953
2      3808.351987
3      3814.001734
4       3813.99566
          ...     
636    3809.553221
637    3809.553614
638    3809.554536
639    3809.510472
640    3809.548059
Name: distance_hosp, Length: 641, dtype: object


In [55]:
# First ensure that your geometries are points
assert all(gdf_spital.geometry.geom_type == 'Point'), "All geometries must be Points."

# Create longitude and latitude columns from the geometry
gdf_spital['longitude'] = gdf_spital.geometry.x
gdf_spital['latitude'] = gdf_spital.geometry.y

# Now convert your GeoDataFrame to a DataFrame
df_spital = pd.DataFrame(gdf_spital.drop(columns='geometry'))

In [56]:
df_spital

Unnamed: 0,name,ort,plz,longitude,latitude
0,Dialysestation Triemli,Zürich,8063,2679979.0,1246673.0
1,Klinik Innere Medizin,Zürich,8037,2681675.0,1250346.0
2,Onkologie und Hämatologie,Zürich,8037,2681675.0,1250346.0
3,Universitäre Klinik für Altersmedizin,Zürich,8037,2681675.0,1250346.0
4,Physiotherapie,Zürich,8037,2681675.0,1250346.0
...,...,...,...,...,...
137,Zentrum für Palliative Care,Zürich,8037,2681675.0,1250346.0
138,Prokto Zürich,Zürich,8063,2679979.0,1246673.0
139,Altersmedizinisches Ambulatorium,Zürich,8063,2679979.0,1246673.0
140,Institut für Notfallmedizin,Zürich,8063,2679979.0,1246673.0
