In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('funda_rawdataset_20240517.csv')

In [3]:
df = df.drop_duplicates()
df = df.drop(columns={'size', 'city', 'last_ask_price', 'insulation', 'heating', 'num_of_bathrooms'})
df = df.rename(columns={'ownership':'outside_space', 'building_type':'newbuild'})

# clean the price data
df['price'] = df['price'].str.replace('€ ', '')
df['price'] = df['price'].str.replace(' /mnd', '')
df['price'] = df['price'].str.replace('k', '')
df['price'] = df['price'].str.replace('na', '0')
df['price'] = df['price'].str.replace('Huurprijs op aanvraag', '0')
df['price'] = df['price'].str.replace('.', '')

# dropping all listings with price = 0 as it can't be correct.
# drop the listings with price of more than 12500. Those listings are properties that are for sale and not for rent. They have been wrongly categorized.
# drop the listings with price < 500 as these are storage rooms or parking garages
df = df.drop(df[df['price'] == '0'].index)
df = df.drop(df[pd.to_numeric(df['price']) > 12500].index)
df = df.drop(df[pd.to_numeric(df['price']) < 500].index)


# clean zip_code into 'XXXX XX'
df['zip_code'] = df['zip_code'].str[:7]
df['zip_code'] = df['zip_code'].str.replace(' ','')


# drop garage listings
df = df.drop(df[df['kind_of_house'] == 'Garage'].index)
df = df.drop(df[df['kind_of_house'] == 'Inpandige garage'].index)
df = df.drop(df[df['kind_of_house'] == 'parkeerkelder'].index)
df = df.drop(df[df['kind_of_house'] == 'Parkeerplaats'].index)

# clean the year. We changed 'Voor XXXX', 'Na XXXX' and 'XXXX-YYYY' to 'XXXX'
df['year'] = df['year'].str.replace('Voor ', '')
df['year'] = df['year'].str.replace('Na ', '')
df['year'] = df['year'].str[:4]

# impute missing data with NaN
def check_year_validity(label):
    if 0 < label < 2025:
        return label
    else:
        return np.nan

df['year'] = df['year'].astype(int).apply(check_year_validity)

def check_living_area_validity(label):
    if label != 'na':
        return label
    else:
        return np.nan

# clean living_area data
df['living_area'] = df['living_area'].str.replace(' m²', '')
df['living_area'] = df['living_area'].astype(str).apply(check_living_area_validity)

df.describe()

Unnamed: 0,year
count,887.0
mean,1959.910936
std,91.418085
min,1005.0
25%,1906.0
50%,1998.0
75%,2024.0
max,2024.0


In [4]:
# extract number of rooms in total
df['num_of_rooms'] = df['layout'].str.extract(r'Aantal kamers(\d+)')
# extract the number of bathrooms
df['num_of_bedrooms'] = df['layout'].str.extract(r'(\d+) slaapkamer')
# extract the number of bedrooms
df['num_of_bathrooms'] = df['layout'].str.extract(r'(\d+) badkamer')
# extract the number of seperate toilets
df['num_of_separate_toilets'] = df['layout'].str.extract(r'(\d+) apart')
# extract the floor number of house
df['floor_of_house'] = df['layout'].str.extract(r'Gelegen op(\w+)')
df['floor_of_house'] = df['floor_of_house'].str.replace('e', '')

df = df.drop(columns={'layout'})

# dummy coding the parking variable. 1 is parkinggarage is available. 0 is parkinggarage is not available
df['parking'] = df['parking'].apply(lambda x: 1 if 'Soort parkeergelegenheid' in x else 0)

# dummy code newbuild variable. 1 if property is newly built. 0 is it is an existing property
df['newbuild'] = df['newbuild'].apply(lambda x: 1 if 'Nieuwbouw' in x else 0)

#change year to age of building
df['year'] = df['year'].astype(int)
df['year'] = 2024 - df['year']
df = df.rename(columns={'year':'age'})

# Turn the energy labels into ordinal values with NaN if value is missing
def check_energy_label(label):
    if 'A++++' in label:
        return 8
    elif 'A+++' in label:
        return 7
    elif 'A++' in label:
        return 6
    elif 'A+' in label:
        return 5
    elif 'A' in label:
        return 4
    elif 'B' in label:
        return 3
    elif 'C' in label:
        return 2
    elif 'D' in label:
        return 1
    else:
        return np.nan

df['energy_label'] = df['energy_label'].astype(str).apply(check_energy_label)

# reduce the amount of categories of the houses into more general categories
def categorize_housing_type(housing_type):
    apartments_flats = [
        'Portiekflat', 'Portiekflat (appartement met open portiek)', 'Galerijflat',
        'Galerijflat (appartement)', 'Portiekflat (appartement)', 'Tussenverdieping',
        'Bovenwoning (appartement)', 'Portiekwoning (appartement)', 'Benedenwoning (appartement)',
        'Maisonnette', 'Tussenverdieping (appartement)', 'Bovenwoning (appartement met open portiek)',
        'Benedenwoning (souterrain)', 'Benedenwoning (souterrain met open portiek)', 'Dubbel benedenhuis',
        'Maisonnette (appartement)', 'Dubbel benedenhuis (appartement)', 'Bovenwoning (dubbel bovenhuis)',
        'Benedenwoning (appartement met open portiek)', 'Tussenverdieping (appartement met open portiek)',
        'Bovenwoning (open portiek)', 'Beneden + bovenwoning (appartement met open portiek)',
        'Tussenverdieping (open portiek)', 'Beneden + bovenwoning (bel-etage)', 'Benedenwoning (bel-etage)',
        'Penthouse (appartement)', 'Dubbel benedenhuis (bel-etage)', 'Dubbel benedenhuis (appartement met open portiek)',
        'Tussenverdieping (service flat)'
    ]
    single_family_homes = [
        'Eengezinswoning, tussenwoning', 'Eengezinswoning, tussenwoning (hofjeswoning)', 
        'Eengezinswoning, geschakelde woning', 'Eengezinswoning, hoekwoning', 
        'Eengezinswoning, tussenwoning (split-level woning)', 'Eengezinswoning, tussenwoning (drive-in woning)', 
        'Eengezinswoning, 2-onder-1-kapwoning'
    ]
    duplex_multi_level_homes = [
        'Beneden + bovenwoning', 'Benedenwoning (dubbel bovenhuis)', 'Maisonnette (dubbel bovenhuis)', 
        'Dubbel benedenhuis (dubbel bovenhuis met open portiek)', 'Bovenwoning (dubbel bovenhuis met open portiek)', 
        'Beneden + bovenwoning (appartement)'
    ]
    luxury_homes = [
        'Penthouse', 'Herenhuis, tussenwoning', 'Villa, halfvrijstaande woning (waterwoning)', 
        'Landhuis, 2-onder-1-kapwoning', 'Grachtenpand, tussenwoning'
    ]
    waterfront_homes = ['Woonboot, vrijstaande woning (waterwoning)']

    if housing_type in apartments_flats:
        return 'apartments/flats'
    elif housing_type in single_family_homes:
        return 'single-family homes'
    elif housing_type in duplex_multi_level_homes:
        return 'duplex/multi-level homes'
    elif housing_type in luxury_homes:
        return 'luxury homes'
    elif housing_type in waterfront_homes:
        return 'waterfront homes'
    else:
        return np.nan

df['kind_of_house'] = df['kind_of_house'].apply(categorize_housing_type)

# dummy code if garden is present
def categorize_garden(x):
    if 'tuin' in x or 'Tuin' in x:
        return 1
    elif 'meter' in x or 'Meter' in x:
        return 1
    elif 'Patio' in x or 'patio' in x:
        return 1
    elif 'zonneterras' in x or 'Zonneterras' in x:
        return 1
    else:
        return 0

df['garden'] = df['outside_space'].apply(categorize_garden)

# dummy code balcony is balcony is available in property
def categorize_balcony(x):
    if 'Balkon' in x:
        return 1
    elif 'balkon' in x:
        return 1
    else:
        return 0

df['balcony'] = df['outside_space'].apply(categorize_balcony)

# dummy code rooftop is a rooftop is available for the tenants
def categorize_rooftop(x):
    if 'Dakterras' in x:
        return 1
    else:
        return 0

df['roof_top'] = df['outside_space'].apply(categorize_rooftop)

In [5]:
# dummy code the kind_of_house variable and add it to the dataframe
dummies = pd.get_dummies(df['kind_of_house'], prefix='type_of_home', dtype='int')
df = pd.concat([df, dummies], axis=1)

# drop the original 'kind_of_house' column as it's no longer needed
df.drop('kind_of_house', axis=1, inplace=True)

In [6]:
# for some variables, parking was present but not correctly inputted in funda, so we corrected it here
df['parking'] = df.apply(lambda row: 1 if '1 auto' in row['outside_space'] else row['parking'], axis=1)
df = df.drop(columns={'outside_space'})

# we replace blank strings where data is missing with the NaN
df[['num_of_rooms', 'energy_label', 'num_of_bedrooms', 'num_of_bathrooms', 'num_of_separate_toilets', 'floor_of_house']] = df[['num_of_rooms', 'energy_label', 'num_of_bedrooms', 'num_of_bathrooms', 'num_of_separate_toilets', 'floor_of_house']].replace('', np.nan)

# the floor_of_house are all digits, except for the ground floor. Because we did some data cleaning before, ground floor is now represented by Bgan, so we replaced Bgan to 0 as it is the 0th floor.
df['floor_of_house'] = df['floor_of_house'].replace('Bgan', 0)
df.head(20)

Unnamed: 0,url,price,address,descrip,zip_code,age,living_area,newbuild,num_of_rooms,energy_label,...,num_of_separate_toilets,floor_of_house,garden,balcony,roof_top,type_of_home_apartments/flats,type_of_home_duplex/multi-level homes,type_of_home_luxury homes,type_of_home_single-family homes,type_of_home_waterfront homes
0,https://www.funda.nl/huur/amsterdam/appartemen...,1045,Schipluidenlaan 254,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,44,1,2,,...,,1.0,0,0,0,1,0,0,0,0
1,https://www.funda.nl/huur/amsterdam/appartemen...,1068,Krijn Taconiskade 299,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,,3.0,0,0,0,1,0,0,0,0
2,https://www.funda.nl/huur/amsterdam/appartemen...,1099,Haarlemmerweg,** BINNENKORT IN VERHUUR | WesterparkWest f...,1014BL,0,50,1,2,,...,,,0,0,0,1,0,0,0,0
3,https://www.funda.nl/huur/amsterdam/appartemen...,1103,Krijn Taconiskade 441,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,,5.0,0,0,0,1,0,0,0,0
4,https://www.funda.nl/huur/amsterdam/appartemen...,1105,Willem Frogerstraat 41,Podium *VERHUUR GESTART* 147 middenhuur app...,1062HZ,0,56,1,2,,...,,1.0,0,0,0,1,0,0,0,0
5,https://www.funda.nl/huur/amsterdam/appartemen...,1115,Schipluidenlaan 270,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,50,1,2,,...,,1.0,0,0,0,1,0,0,0,0
6,https://www.funda.nl/huur/amsterdam/appartemen...,1120,Staalmeesterslaan 377,"Via de website van Makelaardij Hoekstra, hu...",1057PG,53,58,0,2,5.0,...,,13.0,0,0,0,1,0,0,0,0
7,https://www.funda.nl/huur/amsterdam/appartemen...,1120,Staalmeesterslaan 380,"Via de website van Makelaardij Hoekstra, hu...",1057PG,53,58,0,2,5.0,...,,14.0,0,0,0,1,0,0,0,0
8,https://www.funda.nl/huur/amsterdam/appartemen...,1130,Olga de Haasstraat 513,Living the high life! *VERHUUR GESTART* Sch...,1095PG,0,58,1,2,,...,,28.0,0,0,0,1,0,0,0,0
9,https://www.funda.nl/huur/amsterdam/appartemen...,1135,Willem Frogerstraat 53,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HZ,0,80,1,4,,...,,1.0,0,0,0,1,0,0,0,0


In [7]:
#change variable names into something more readable

df = df.rename(columns={'price':'rent','address':'street', 'descrip':'desc', 'zip_code':'pc6', 'living_area':'sqmtr', 'num_of_rooms':'rooms', 'energy_label':'elabel','num_of_bedrooms':'bedrooms', 'num_of_bathrooms':'bathrooms', 'num_of_separate_toilets':'toilets', 'floor_of_house':'floor', 'roof_top':'rooftop', 'type_of_home_apartments/flats':'apartment', 'type_of_home_duplex/multi-level homes':'duplexmulti', 'type_of_home_luxury homes':'luxury', 'type_of_home_single-family homes':'singlefam', 'type_of_home_waterfront homes':'houseboat'})

In [8]:
df.columns

Index(['url', 'rent', 'street', 'desc', 'pc6', 'age', 'sqmtr', 'newbuild',
       'rooms', 'elabel', 'parking', 'bedrooms', 'bathrooms', 'toilets',
       'floor', 'garden', 'balcony', 'rooftop', 'apartment', 'duplexmulti',
       'luxury', 'singlefam', 'houseboat'],
      dtype='object')

In [9]:
#importing the cleaned dataset and making a separate column for the pc4 to be able to merge the pc4 dataset with the funda dataset
df['pc4'] = df['pc6'].str[:4]
df['pc4'] = df['pc4'].astype(int)

#import the pc4 dataset and rename the column to have a common column name between the datasets
df_cbs = pd.read_csv('cbs_rawdataset_pc4.csv')
df_cbs = df_cbs.rename(columns={'Postcode-4':'pc4'})

#merge the two datasets
df_merged = pd.merge(df, df_cbs, on='pc4')
df_merged.head(20)

Unnamed: 0,url,rent,street,desc,pc6,age,sqmtr,newbuild,rooms,elabel,...,houseboat,pc4,Inwoners Totaal (x1),Geboren in Nederland met een Nederlandse herkomst (%),Huishouden Totaal (x1),Huishoudgrootte (x1),Woning Totaal (x1),Eigendom Koopwoning (%),WOZ-waarde\nwoning (x 1 000 Euro),Omgevingsadressendichtheid (adressen/km2)
0,https://www.funda.nl/huur/amsterdam/appartemen...,1045,Schipluidenlaan 254,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,44,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
1,https://www.funda.nl/huur/amsterdam/appartemen...,1068,Krijn Taconiskade 299,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,0,1087,19860,40,8040,2.5,7840,50,536,1805
2,https://www.funda.nl/huur/amsterdam/appartemen...,1099,Haarlemmerweg,** BINNENKORT IN VERHUUR | WesterparkWest f...,1014BL,0,50,1,2,,...,0,1014,3660,60,1705,2.1,1745,50,731,3309
3,https://www.funda.nl/huur/amsterdam/appartemen...,1103,Krijn Taconiskade 441,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,0,1087,19860,40,8040,2.5,7840,50,536,1805
4,https://www.funda.nl/huur/amsterdam/appartemen...,1105,Willem Frogerstraat 41,Podium *VERHUUR GESTART* 147 middenhuur app...,1062HZ,0,56,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
5,https://www.funda.nl/huur/amsterdam/appartemen...,1115,Schipluidenlaan 270,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,50,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
6,https://www.funda.nl/huur/amsterdam/appartemen...,1120,Staalmeesterslaan 377,"Via de website van Makelaardij Hoekstra, hu...",1057PG,53,58,0,2,5.0,...,0,1057,17345,40,10005,1.7,9165,20,397,10790
7,https://www.funda.nl/huur/amsterdam/appartemen...,1120,Staalmeesterslaan 380,"Via de website van Makelaardij Hoekstra, hu...",1057PG,53,58,0,2,5.0,...,0,1057,17345,40,10005,1.7,9165,20,397,10790
8,https://www.funda.nl/huur/amsterdam/appartemen...,1130,Olga de Haasstraat 513,Living the high life! *VERHUUR GESTART* Sch...,1095PG,0,58,1,2,,...,0,1095,15010,40,8375,1.8,8155,20,375,4572
9,https://www.funda.nl/huur/amsterdam/appartemen...,1135,Willem Frogerstraat 53,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HZ,0,80,1,4,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646


In [10]:
df_merged = pd.merge(df, df_cbs, on='pc4')
df_merged.head(20)

Unnamed: 0,url,rent,street,desc,pc6,age,sqmtr,newbuild,rooms,elabel,...,houseboat,pc4,Inwoners Totaal (x1),Geboren in Nederland met een Nederlandse herkomst (%),Huishouden Totaal (x1),Huishoudgrootte (x1),Woning Totaal (x1),Eigendom Koopwoning (%),WOZ-waarde\nwoning (x 1 000 Euro),Omgevingsadressendichtheid (adressen/km2)
0,https://www.funda.nl/huur/amsterdam/appartemen...,1045,Schipluidenlaan 254,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,44,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
1,https://www.funda.nl/huur/amsterdam/appartemen...,1068,Krijn Taconiskade 299,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,0,1087,19860,40,8040,2.5,7840,50,536,1805
2,https://www.funda.nl/huur/amsterdam/appartemen...,1099,Haarlemmerweg,** BINNENKORT IN VERHUUR | WesterparkWest f...,1014BL,0,50,1,2,,...,0,1014,3660,60,1705,2.1,1745,50,731,3309
3,https://www.funda.nl/huur/amsterdam/appartemen...,1103,Krijn Taconiskade 441,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,0,1087,19860,40,8040,2.5,7840,50,536,1805
4,https://www.funda.nl/huur/amsterdam/appartemen...,1105,Willem Frogerstraat 41,Podium *VERHUUR GESTART* 147 middenhuur app...,1062HZ,0,56,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
5,https://www.funda.nl/huur/amsterdam/appartemen...,1115,Schipluidenlaan 270,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,50,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
6,https://www.funda.nl/huur/amsterdam/appartemen...,1120,Staalmeesterslaan 377,"Via de website van Makelaardij Hoekstra, hu...",1057PG,53,58,0,2,5.0,...,0,1057,17345,40,10005,1.7,9165,20,397,10790
7,https://www.funda.nl/huur/amsterdam/appartemen...,1120,Staalmeesterslaan 380,"Via de website van Makelaardij Hoekstra, hu...",1057PG,53,58,0,2,5.0,...,0,1057,17345,40,10005,1.7,9165,20,397,10790
8,https://www.funda.nl/huur/amsterdam/appartemen...,1130,Olga de Haasstraat 513,Living the high life! *VERHUUR GESTART* Sch...,1095PG,0,58,1,2,,...,0,1095,15010,40,8375,1.8,8155,20,375,4572
9,https://www.funda.nl/huur/amsterdam/appartemen...,1135,Willem Frogerstraat 53,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HZ,0,80,1,4,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646


In [12]:
df = df_merged
df.head()

Unnamed: 0,url,rent,street,desc,pc6,age,sqmtr,newbuild,rooms,elabel,...,houseboat,pc4,Inwoners Totaal (x1),Geboren in Nederland met een Nederlandse herkomst (%),Huishouden Totaal (x1),Huishoudgrootte (x1),Woning Totaal (x1),Eigendom Koopwoning (%),WOZ-waarde\nwoning (x 1 000 Euro),Omgevingsadressendichtheid (adressen/km2)
0,https://www.funda.nl/huur/amsterdam/appartemen...,1045,Schipluidenlaan 254,Podium * VERHUUR GESTART * 147 middenhuur ...,1062HE,0,44,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646
1,https://www.funda.nl/huur/amsterdam/appartemen...,1068,Krijn Taconiskade 299,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,0,1087,19860,40,8040,2.5,7840,50,536,1805
2,https://www.funda.nl/huur/amsterdam/appartemen...,1099,Haarlemmerweg,** BINNENKORT IN VERHUUR | WesterparkWest f...,1014BL,0,50,1,2,,...,0,1014,3660,60,1705,2.1,1745,50,731,3309
3,https://www.funda.nl/huur/amsterdam/appartemen...,1103,Krijn Taconiskade 441,*** See English version below *** Huren D...,1087HW,2,44,1,1,6.0,...,0,1087,19860,40,8040,2.5,7840,50,536,1805
4,https://www.funda.nl/huur/amsterdam/appartemen...,1105,Willem Frogerstraat 41,Podium *VERHUUR GESTART* 147 middenhuur app...,1062HZ,0,56,1,2,,...,0,1062,14250,30,8805,1.6,8275,10,293,5646


In [1]:
#I'll extract the coordinates of the addresses

import googlemaps
import json

df['full address'] = df['street'] + " " + df['pc6']

gmaps = googlemaps.Client(key='AIzaSyBvJFN53P1aS2hqnTh46liJnWAfACuIG-c')

def get_lat_long(address):
    try:
        geocode_result = gmaps.geocode(address)
        location = geocode_result[0]['geometry']['location']
        return location['lat'], location['lng']
    except Exception as e:
        return None, None

df['latitude'], df['longitude'] = zip(*df['full address'].apply(get_lat_long))

NameError: name 'df' is not defined

In [14]:
df.to_csv('rentprediction_dataset_v1.csv', index=False)

The above uses the Google API and takes time to load. All the data processing in the above does not have to be done everytime. You can just import the 'rentprediction_dataset_v1.csv' document.

In [49]:
import pandas as pd
import numpy as np

df = pd.read_csv('rentprediction_dataset_v1.csv')
df = df.drop(columns={'Inwoners Totaal (x1)', 'Huishouden Totaal (x1)', 'Woning Totaal (x1)'})
df = df.rename(columns={'Geboren in Nederland met een Nederlandse herkomst (%)':'percnative_pc4', 'Huishoudgrootte (x1)':'sizehoudehold_pc4', 'Eigendom Koopwoning (%)':'percpropertyown_pc4',"WOZ-waarde\nwoning (x 1 000 Euro)":'taxablepropertyvaluation', 'Omgevingsadressendichtheid (adressen/km2)':'populationdensity_pc4'})
df['percnative_pc4'] = df['percnative_pc4'].apply(lambda x: x*0.01)
df['percpropertyown_pc4'] = df['percpropertyown_pc4'].apply(lambda x: x*0.01)
df['taxablepropertyvaluation'] = df['taxablepropertyvaluation'].apply(lambda x: x*1000)
df = df.dropna(subset=['latitude'])
df.to_csv('rentprediction_dataset_v2.csv', index=False)

In [51]:
def calculate_distance(lat1, lon1, lat2, lon2):
    return np.sqrt((lat1 - lat2)**2 + (lon1 - lon2)**2)

parks_df = pd.read_csv('amsterdamparks.csv')

for park_index, park_row in parks_df.iterrows():
    park_name = park_row['Park Name']
    park_lat = park_row['Latitude']
    park_long = park_row['Longitude']
    
    # Calculate distance for each rental property to the current park
    df[f'eucl_dist_to_{park_name}'] = df.apply(
        lambda row: calculate_distance(row['latitude'], row['longitude'], park_lat, park_long), axis=1
    )

In [58]:
def calculate_distance(lat1, lon1, lat2, lon2):
    return np.sqrt((lat1 - lat2)**2 + (lon1 - lon2)**2)

tramsubway_df = pd.read_csv('tramsubwaystopamsterdam.csv', sep=';')

for index, tramsub_row in tramsubway_df.iterrows():
    tramsub_name = tramsub_row['Naam']
    tramsub_lat = tramsub_row['LAT']
    tramsub_long = tramsub_row['LNG']
    
    # Calculate distance for each rental property to the current park
    df[f'eucl_dist_to_{tramsub_name}'] = df.apply(lambda row: calculate_distance(row['latitude'], row['longitude'], tramsub_lat, tramsub_long), axis=1)

  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_{tramsub_name}'] = df.apply(
  df[f'eucl_dist_to_

In [61]:
import pandas as pd
from geopy.geocoders import Nominatim
import time

# Initialize geolocator
geolocator = Nominatim(user_agent="amsterdam_stations")

# List of train stations in Amsterdam
stations = [
    "Station Amsterdam Muiderpoort",
    "Station Amsterdam Amstel",
    "Station Amsterdam Science Park",
    "Station Amsterdam Sloterdijk",
    "Station Amsterdam Lelylaan",
    "Station RAI Amsterdam",
    "Station Amsterdam Zuid/WTC",
    "Station Duivendrecht",
    "Station Amsterdam Bijlmer",
    "Station Amsterdam Holendrecht",
    "Station Weesp",
    "Station Amsterdam Centraal"
]

# Function to get coordinates
def get_coordinates(station_name):
    location = geolocator.geocode(f"{station_name}, Amsterdam, Netherlands")
    if location:
        return location.latitude, location.longitude
    else:
        return None, None

# Get coordinates for each station and store in a list
data = []
for station in stations:
    time.sleep(1)  # To respect Nominatim's usage policy
    lat, lon = get_coordinates(station)
    data.append({"Station": station, "Latitude": lat, "Longitude": lon})

# Create DataFrame
trainstation_df = pd.DataFrame(data)

# Drop rows where coordinates are None
trainstation_df = trainstation_df.dropna(subset=['Latitude', 'Longitude'])

                                                   url  rent  \
0    https://www.funda.nl/huur/amsterdam/appartemen...  1045   
1    https://www.funda.nl/huur/amsterdam/appartemen...  1068   
2    https://www.funda.nl/huur/amsterdam/appartemen...  1099   
3    https://www.funda.nl/huur/amsterdam/appartemen...  1103   
4    https://www.funda.nl/huur/amsterdam/appartemen...  1105   
..                                                 ...   ...   
882  https://www.funda.nl/huur/amsterdam/appartemen...  8900   
883  https://www.funda.nl/huur/amsterdam/appartemen...  9000   
884  https://www.funda.nl/huur/amsterdam/huis-43572...  9500   
885  https://www.funda.nl/huur/amsterdam/appartemen...  9500   
886  https://www.funda.nl/huur/amsterdam/appartemen...  9500   

                                  street  \
0                    Schipluidenlaan 254   
1                  Krijn Taconiskade 299   
2                          Haarlemmerweg   
3                  Krijn Taconiskade 441   
4          

In [63]:
trainstation_df.to_csv('trainstations.csv')