In [5]:
# import libraries
import numpy as np
import  pandas as pd
import geopy
from geopy.geocoders import Nominatim

In [6]:
# parse the wiki page
url = 'https://en.wikipedia.org/wiki/Boroughs_of_Amsterdam#List_of_boroughs'
df = pd.read_html(url)
df = df[1]
df.drop('Location (in green)', axis = 1, inplace = True)
df.rename(columns = {'Area':'areaKm2','Population density' : 'densityPerkm2'}, inplace = True)
df['areaKm2'] =  df['areaKm2'].apply(lambda x: x.split(' ')[0])
df['densityPerkm2'] = df['densityPerkm2'].apply(lambda x: x.split('/')[0])
df['densityPerkm2'] = df['densityPerkm2'].apply(lambda x: ''.join(x.split(',')))
df['densityPerkm2'] = pd.to_numeric(df['densityPerkm2'])
df['areaKm2'] = pd.to_numeric(df['areaKm2'])
df.sort_values(by = ['densityPerkm2'], ascending = False, inplace = True)
df = df.reset_index(drop = True)
print(df)

                   Borough  areaKm2  Population  densityPerkm2  \
0                     West     9.89      143842          15252   
1         Centrum (Centre)     8.04       86422          13748   
2             Zuid (South)    17.41      144432           9349   
3              Oost (East)    30.56      135767           7635   
4     Nieuw-West(New West)    32.38      151677           4478   
5      Zuidoost(Southeast)    22.08       87854           4391   
6            Noord (North)    49.01       94766           2269   
7  Westpoort(West Gateway)    10.00         192             10   

                                      Neighbourhoods  
0  Frederik Hendrikbuurt, Houthaven, Spaarndammer...  
1  Binnenstad, Grachtengordel, Haarlemmerbuurt, J...  
2  Apollobuurt, Buitenveldert, Hoofddorppleinbuur...  
3  IJburg, Indische Buurt, Eastern Docklands, Oud...  
4  Geuzenveld, Nieuw Sloten, Oostoever, Osdorp, O...  
5    Bijlmermeer, Venserpolder, Gaasperdam, Driemond  
6  Banne Buiksloot, 

In [7]:
# split neighborhood into each row
data_neighborhood = []
data_borough = []
for idx in range(df.shape[0]):
    neighborhoods = df.iloc[idx]['Neighbourhoods']
    neighborhoods = neighborhoods.split(',')
    neighborhoods_list = [i.strip() for i in neighborhoods]
    borough_tmp = len(neighborhoods_list)*[df.iloc[idx]['Borough']]
    data_neighborhood.extend(neighborhoods_list)
    data_borough.extend(borough_tmp)
df_data = pd.DataFrame([data_borough,data_neighborhood])
df_data = df_data.transpose()
df_data.columns = ['Borough', 'Neighborhoods']

print(df_data.head(20))


             Borough          Neighborhoods
0               West  Frederik Hendrikbuurt
1               West              Houthaven
2               West      Spaarndammerbuurt
3               West      Staatsliedenbuurt
4               West         Zeeheldenbuurt
5               West             Westerpark
6               West            Kinkerbuurt
7               West          Overtoombuurt
8               West            De Baarsjes
9               West          Bos en Lommer
10              West          Kolenkitbuurt
11              West               Landlust
12              West             Sloterdijk
13  Centrum (Centre)             Binnenstad
14  Centrum (Centre)         Grachtengordel
15  Centrum (Centre)        Haarlemmerbuurt
16  Centrum (Centre)             Jodenbuurt
17  Centrum (Centre)                Jordaan
18  Centrum (Centre)               Kadijken
19  Centrum (Centre)                Lastage


In [8]:
# retrieve the coordination
coor_ll = []
for idx in range(df_data.shape[0]):
    geolocator = Nominatim(user_agent = 'Netherland_explorer')
    try:
        coor = geolocator.geocode(df_data.loc[idx]['Neighborhoods'])
        #coor = geolocator.geocode('Overtoombuurt')
        coor_ll.append([coor.latitude, coor.longitude])
    except:
        coor_ll.append([np.nan,np.nan])
print(coor_ll)


[[52.376955699999996, 4.87408475121028], [52.39337645, 4.881680240481273], [52.389662599999994, 4.87936892609182], [52.3802865, 4.870950694196747], [52.389329849999996, 4.888242227776295], [52.387236349999995, 4.871777328438663], [52.3691672, 4.866649434878931], [nan, nan], [52.3689257, 4.8563825], [52.3785206, 4.8487385], [52.3796239, 4.8414043], [52.379851, 4.858608466297305], [52.3871325, 4.8465234], [50.8492705, 5.6887558], [52.370836999999995, 4.885478190638034], [52.382441299999996, 4.887193084850383], [nan, nan], [52.3754157, 4.8810958], [52.3677527, 4.919543395257523], [52.371250450000005, 4.905507717552577], [52.37659135, 4.907560405017876], [52.371869000000004, 4.922875349226905], [52.366405, 4.913728577285194], [52.155884, 4.4876151], [52.3706669, 4.905258231575102], [nan, nan], [52.3607227, 4.887778], [52.348072599999995, 4.875559011765657], [52.3286468, 4.8735234], [nan, nan], [52.355760950000004, 4.876834631189791], [52.3542396, 4.896946171061886], [nan, nan], [52.34404, 

In [9]:
# append the above coordination to the df_data
coor_ll = np.array(coor_ll)
df_coor = pd.DataFrame(coor_ll)
df_ams = pd.concat([df_data, df_coor], axis = 1)
print(df_ams)
df_ams.to_csv('data.csv')

                    Borough                        Neighborhoods          0  \
0                      West                Frederik Hendrikbuurt  52.376956   
1                      West                            Houthaven  52.393376   
2                      West                    Spaarndammerbuurt  52.389663   
3                      West                    Staatsliedenbuurt  52.380286   
4                      West                       Zeeheldenbuurt  52.389330   
5                      West                           Westerpark  52.387236   
6                      West                          Kinkerbuurt  52.369167   
7                      West                        Overtoombuurt        NaN   
8                      West                          De Baarsjes  52.368926   
9                      West                        Bos en Lommer  52.378521   
10                     West                        Kolenkitbuurt  52.379624   
11                     West                         

## Discussion about data obtain


- There are some fields the geopy could not find the coordinations for latitude and longitude, so then i have to fill it by manually search each one. Howerver, that are not so many. And the final data will be attached!