## Notebook to enrich location data with info about access to public transport

### Load the data

In [12]:
import pandas as pd

filepath = 'public_transport.csv'
relevant_columns = ['Name','Betriebspunkttyp_Bezeichnung','Verkehrsmittel_Bezeichnung','E','N']

df_transport = pd.read_csv(filepath, sep=",", encoding='latin1')[relevant_columns]
df_transport.head(1)

Unnamed: 0,Name,Betriebspunkttyp_Bezeichnung,Verkehrsmittel_Bezeichnung,E,N
0,"Disentis/Mustér, staziun/posta",Haltestelle,Bus,2708310,1173635


In [6]:
filepath = '../TablesDB/Location_v3_enriched_v4.csv'

df_locationfile = pd.read_csv(filepath)
df_locationfile.head(1)

Unnamed: 0,LocationId,Street,ZIP,longitude,latitude,#supermarkets,#foodandbeverage,BFS_NUMMER,NAME,Einkommenssteuer_Kanton,Einkommenssteuer_Gemeinde,Vermögenssteuer_Kanton,Vermögenssteuer_Gemeinde,Gemeindecode,population_density
0,78753131-6d78-4d1e-a0e5-fc8b5f430570,,6598.0,8.849665,46.175962,2,3,5131,Tenero-Contra,97.0,93.0,97.0,93.0,5131.0,873


### Convert LV95 coordinate system to WGS84 system

In [15]:
from pyproj import Proj, transform

lv95 = Proj(init='epsg:2056')  # LV95 projection
wgs84 = Proj(init='epsg:4326')  # WGS84 projection

# Convert LV95 coordinates to WGS84 (latitude and longitude)
# Convert LV95 coordinates to WGS84 (latitude and longitude)
df_transport['Longitude'], df_transport['Latitude'] = transform(lv95, wgs84, df_transport['E'].values, df_transport['N'].values)
df_transport.drop(['E', 'N'], axis=1, inplace=True)
df_transport


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  df_transport['Longitude'], df_transport['Latitude'] = transform(lv95, wgs84, df_transport['E'].values, df_transport['N'].values)


Unnamed: 0,Name,Betriebspunkttyp_Bezeichnung,Verkehrsmittel_Bezeichnung,Longitude,Latitude
0,"Disentis/Mustér, staziun/posta",Haltestelle,Bus,8.854999,46.705132
1,Brig,Haltestelle und Bedienpunkt,Zug,7.988091,46.319422
2,"Goldach, Bahnhof",Haltestelle,Bus,9.466060,47.473769
3,"Ecublens VD, Bassenges",Haltestelle,Metro,6.564591,46.524586
4,"St.Gallen, Mühlegg Bergstation",Haltestelle,Standseilbahn,9.379461,47.419528
...,...,...,...,...,...
28519,"Herisau, Bahnhof",Haltestelle,Bus,9.276727,47.389873
28520,"Niederhof, Feuerwehrdepot",Haltestelle,Bus,9.092973,47.523093
28521,Landschlacht,Haltestelle,Zug,9.246953,47.628896
28522,"Bern, Spital Sonnenhof",Haltestelle,Bus,7.473596,46.943126


### Filter out only relevant stops (for commercial use: "Haltestelle" & "Halltestelle und Bedienpunkt")

In [19]:
print(df_transport.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28524 entries, 0 to 28523
Data columns (total 5 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Name                          28524 non-null  object 
 1   Betriebspunkttyp_Bezeichnung  28524 non-null  object 
 2   Verkehrsmittel_Bezeichnung    26933 non-null  object 
 3   Longitude                     28524 non-null  float64
 4   Latitude                      28524 non-null  float64
dtypes: float64(2), object(3)
memory usage: 1.1+ MB
None


In [20]:
relevant_stops = ['Haltestelle', 'Haltestelle und Bedienpunkt']

df_transport = df_transport[df_transport['Betriebspunkttyp_Bezeichnung'].isin(relevant_stops)]

print(df_transport.info())


<class 'pandas.core.frame.DataFrame'>
Index: 26933 entries, 0 to 28523
Data columns (total 5 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Name                          26933 non-null  object 
 1   Betriebspunkttyp_Bezeichnung  26933 non-null  object 
 2   Verkehrsmittel_Bezeichnung    26933 non-null  object 
 3   Longitude                     26933 non-null  float64
 4   Latitude                      26933 non-null  float64
dtypes: float64(2), object(3)
memory usage: 1.2+ MB
None


### Count the number of public transport stops in a certain radius of each apartment

In [21]:
#only run once, compute intensive
import pandas as pd
import haversine as hs
from haversine import Unit

apartments_df = df_locationfile

def count_nearby_poi(apartment, poi, radius):
    count = 0
    for index, point in poi.iterrows():
        # Calculate distance between apartment and point of interest
        distance = hs.haversine((apartment['latitude'], apartment['longitude']), 
                                (point['Latitude'], point['Longitude']), 
                                unit=Unit.METERS)
        if distance <= radius:
            count += 1
    return count


# Define the radius for nearby points of interest
radius = 500  # 500 meters

# Create an empty list to store the count of nearby points of interest for each apartment
poi_counts_stops = []


# Iterate through each apartment
for index, apartment in apartments_df.iterrows():
    # Count nearby points of interest for the current apartment
    count_stops = count_nearby_poi(apartment, df_transport, radius)
    poi_counts_stops.append(count_stops)
   

# Add the list of counts to the apartments_df DataFrame as a new column
apartments_df['#public_transport_stops'] = poi_counts_stops

# Show the DataFrame with the count of nearby points of interest for each apartment
apartments_df

Unnamed: 0,LocationId,Street,ZIP,longitude,latitude,#supermarkets,#foodandbeverage,BFS_NUMMER,NAME,Einkommenssteuer_Kanton,Einkommenssteuer_Gemeinde,Vermögenssteuer_Kanton,Vermögenssteuer_Gemeinde,Gemeindecode,population_density,#public_transport_stops
0,78753131-6d78-4d1e-a0e5-fc8b5f430570,,6598.0,8.849665,46.175962,2,3,5131,Tenero-Contra,97.0,93.0,97.0,93.0,5131.0,873,8
1,f0c2bc0e-55ab-4eb1-98fa-edcb4a0ee01e,via albonago 43,6962.0,8.974113,46.013360,0,3,5192,Lugano,97.0,77.0,97.0,77.0,5192.0,825,10
2,fbdcde66-0fd7-4304-a83e-70ecbb0f6ae7,Via San Gottardo 41,6500.0,9.032918,46.201556,0,1,5002,Bellinzona,97.0,93.0,97.0,93.0,5002.0,264,6
3,2d7451a3-d7e1-4dc7-babf-ab237f2c02e4,Via F. Zorzi 17,6500.0,9.015443,46.187900,0,6,5002,Bellinzona,97.0,93.0,97.0,93.0,5002.0,264,10
4,2bde1434-f480-4d46-9b05-99e0c0659671,Via San Gottardo,6900.0,8.941940,46.015976,3,3,5221,Savosa,97.0,75.0,97.0,75.0,5221.0,2'972,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11341,2dba2402-d23f-4ab8-9bb6-702bbe28cb4e,Chemin de la Petite-Californie,1222.0,6.207831,46.242178,0,0,6616,Collonge-Bellerive,147.5,29.0,147.5,29.0,6616.0,1'356,3
11342,daed4779-995c-4c34-85e7-4b269d8bf5cc,Chemin William-Barbey,1292.0,6.149006,46.241745,0,0,6634,Pregny-ChambÃ©sy,147.5,32.0,147.5,32.0,6634.0,1'165,4
11343,faddd38c-e9fc-4f4d-bf09-a41818cc620f,Rue du Temple 5,1236.0,6.018499,46.174626,0,1,6609,Cartigny,147.5,42.0,147.5,42.0,6609.0,225,2
11344,84d332ee-e1a8-4b5f-a432-82819fffe330,Chemin de la Pierre-à-Bochet,1226.0,6.214858,46.199271,0,0,6640,ThÃ´nex,147.5,44.0,147.5,44.0,6640.0,3'713,4


In [22]:
#only run once otherwise you overwrite the existing data
#filepath = '../TablesDB/Location_v3_enriched_v5.csv'
#apartments_df.to_csv(filepath, index=False)

### Extract the public transport stops for one single apartment

In [28]:
import pandas as pd
import haversine as hs
from haversine import Unit

def find_nearby_poi(location_lat, location_lon, poi, radius):
    nearby_poi = pd.DataFrame(columns=poi.columns)  # Initialize an empty DataFrame to store nearby points of interest
    for index, point in poi.iterrows():
        # Calculate distance between location and point of interest
        distance = hs.haversine((location_lat, location_lon), 
                                (point['Latitude'], point['Longitude']), 
                                unit=Unit.METERS)
        if distance <= radius:
            # Add the point of interest to the nearby_poi DataFrame
            nearby_poi.loc[len(nearby_poi)] = point
    return nearby_poi

def count_nearby_poi(location_lat, location_lon, poi, radius):
    nearby_poi = find_nearby_poi(location_lat, location_lon, poi, radius)
    count = len(nearby_poi)
    return count, nearby_poi

# Assuming location_lat and location_lon are the latitude and longitude of the location
# and supermarket_df is your DataFrame containing points of interest

# Define the radius for nearby points of interest
radius = 500  # 500 meters

apartment = [47.372881725214576, 8.49887991367827]

# Count nearby points of interest for the location
poi_count, nearby_poi_df = count_nearby_poi(apartment[0], apartment[1], df_transport, radius)

# Print the count of nearby points of interest
print("Number of nearby points of interest:", poi_count)

# Print the DataFrame containing nearby points of interest
print("Nearby points of interest:")
nearby_poi_df


Number of nearby points of interest: 5
Nearby points of interest:


Unnamed: 0,Name,Betriebspunkttyp_Bezeichnung,Verkehrsmittel_Bezeichnung,Longitude,Latitude
0,"Zürich, Schaufelbergerstrasse",Haltestelle,Bus / Tram,8.501841,47.368903
1,"Zürich, Langgrütstrasse",Haltestelle,Bus,8.501064,47.37374
2,"Zürich, Schulhaus Altweg",Haltestelle,Bus,8.494202,47.372274
3,"Zürich, Im Gut",Haltestelle,Bus,8.503365,47.373611
4,"Zürich, Sackzelg",Haltestelle,Bus,8.495515,47.374385
