In [1]:
import osmnx as ox
import requests
import pandas as pd
import geopandas as gpd
import os
import numpy as np
import warnings
import time
import math

In [2]:
%%time
# Webscraping by looking at HTML-table formats. Get de Paris Urban area (Paris Aire Urbaine) 
# by French statistic bureau INSEE via citypopulation.de

city = 'Paris'
url = 'https://citypopulation.de/en/france/paris/admin/'
html = requests.get(url).content
df_list = pd.read_html(html)
df = df_list[0]
df

Wall time: 1.33 s


Unnamed: 0,Name,Status,PopulationEstimate2007-01-01,PopulationEstimate2015-01-01,PopulationEstimate2018-01-01,Unnamed: 5
0,Ablon-sur-Seine,Commune,5155,5527,5818,→
1,Ablon-sur-Seine,Commune,5155,5527,5818,→
2,Achères,Commune,19789,21053,21098,→
3,Achères Gare,Statistical Area,139,198,235,→
4,Centre Ville,Statistical Area,2066,2358,2761,→
...,...,...,...,...,...,...
4708,Gambetta-Brossolette-GARE,Statistical Area,4224,4315,4367,→
4709,Gros Bois-Mare Armée-Sablière,Statistical Area,3558,3818,3730,→
4710,Rives de l'Yerres-Tournelles,Statistical Area,4555,4473,4493,→
4711,Taillis-Garenne,Statistical Area,4063,4139,4116,→


In [3]:
%%time
# Formatting, only take communes and municipal arrondissements, no higher or lower level
unités_urbaine = df[(df['Status'] == 'Commune') | (df['Status'] == 'Municipal Arrondissement')]
# Drop duplicates (this is due to communes that consist of one statistical area also getting 'commune' tag)
unités_urbaine = unités_urbaine.drop_duplicates()
# Renaming columns and drop last. Because population information can be useful later.
unités_urbaine.columns = ['name','status','pop2007','pop2015','pop2018','no_name']
print('Population: ',city,sum(unités_urbaine['pop2018']))
unités_urbaine = unités_urbaine.iloc[:,:5]

# Get Paris and the cities included in the unités_urbaine, get the right 'Marolles-en-Brie' (two present in Ile-de-France)
locals()[city] = list(unités_urbaine[unités_urbaine['name'] != 'Marolles-en-Brie']['name']+', Ile-de-France')
locals()[city].append('Marolles-en-Brie, Val-de-Marne, Ile-de-France')
unités_urbaine

Population:  Paris 10829408
Wall time: 4.99 ms


Unnamed: 0,name,status,pop2007,pop2015,pop2018
0,Ablon-sur-Seine,Commune,5155,5527,5818
2,Achères,Commune,19789,21053,21098
12,Alfortville,Commune,44116,44410,44287
28,Andilly,Commune,2449,2572,2581
30,Andrésy,Commune,12501,12403,13078
...,...,...,...,...,...
4645,Viry-Châtillon,Commune,31249,30831,30706
4658,Vitry-sur-Seine,Commune,83650,92531,94649
4696,Voisins-le-Bretonneux,Commune,12153,11378,10921
4702,Wissous,Commune,5112,7687,7301


In [89]:
%%time
# suppress scientific notation of geometries
pd.set_option('display.float_format', lambda x: '%.1f' % x)
start_time = time.time()

# Amenities and shops that need to be extracted
s = ['restaurant','cafe','college','university','pharmacy','dentist','doctors','cinema','theatre','supermarket',
            'bakery','butcher','greengrocer']
m = ['amenity','amenity','amenity','amenity','amenity','amenity','amenity','amenity','amenity','shop','shop','shop','shop']
Paris_amenities = pd.DataFrame()

# For every amenity/shop, extract from OSM
for i in range(len(s)):
    warnings.filterwarnings('ignore')
    locals()[s[i]] = ox.geometries_from_place(locals()[city], tags={m[i]:s[i]})
    
    # Subset and rename columns
    locals()[s[i]] = locals()[s[i]][[m[i],'name','geometry']]
    locals()[s[i]].columns = ['amenity','name','geometry']
    print('path: D:/EconNet/'+str(city)+'/POI/OSM_'+m[i]+'_'+s[i]+'.shp')
    
    # Take the centroid of the location for simpler computation in R
    warnings.filterwarnings('ignore')
    locals()[s[i]]['geometry'] = locals()[s[i]]['geometry'].centroid
    
    # Get X and Y coordinates for computation in R
    locals()[s[i]]['geom_x_m'] = locals()[s[i]]['geometry'].to_crs(3043).x
    locals()[s[i]]['geom_y_m'] = locals()[s[i]]['geometry'].to_crs(3043).y
    
    # Save the specific amenity as shapefile
    locals()[s[i]].to_file('D:/EconNet/'+str(city)+'/POI/OSM_'+m[i]+'_'+s[i]+'.shp')
    
    # Add the amenity to the mastertable of amenities/shops
    Paris_amenities = pd.concat([Paris_amenities,locals()[s[i]]])
    print(s[i],'done',round((time.time() - start_time) / 60,2),' mns')

# Export the mastertable
Paris_amenities.to_file('D:/EconNet/Paris/POI/amenities_shops.shp')
Paris_amenities

path: D:/EconNet/Paris/POI/OSM_amenity_restaurant.shp
restaurant done 0.25  mns
path: D:/EconNet/Paris/POI/OSM_amenity_cafe.shp
cafe done 0.44  mns
path: D:/EconNet/Paris/POI/OSM_amenity_college.shp
college done 0.61  mns
path: D:/EconNet/Paris/POI/OSM_amenity_university.shp
university done 0.81  mns
path: D:/EconNet/Paris/POI/OSM_amenity_pharmacy.shp
pharmacy done 1.01  mns
path: D:/EconNet/Paris/POI/OSM_amenity_dentist.shp
dentist done 1.19  mns
path: D:/EconNet/Paris/POI/OSM_amenity_doctors.shp
doctors done 1.36  mns
path: D:/EconNet/Paris/POI/OSM_amenity_cinema.shp
cinema done 1.53  mns
path: D:/EconNet/Paris/POI/OSM_amenity_theatre.shp
theatre done 1.7  mns
path: D:/EconNet/Paris/POI/OSM_shop_supermarket.shp
supermarket done 1.88  mns
path: D:/EconNet/Paris/POI/OSM_shop_bakery.shp
bakery done 2.07  mns
path: D:/EconNet/Paris/POI/OSM_shop_butcher.shp
butcher done 2.25  mns
path: D:/EconNet/Paris/POI/OSM_shop_greengrocer.shp
greengrocer done 2.42  mns
Wall time: 2min 29s


Unnamed: 0_level_0,Unnamed: 1_level_0,amenity,name,geometry,geom_x_m,geom_y_m
element_type,osmid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
node,244497303,restaurant,Quai 35,POINT (2.47726 48.61734),461473.2,5385050.8
node,252816603,restaurant,Sinorama,POINT (2.36017 48.82636),453038.9,5408351.1
node,252816604,restaurant,Phở Banh Cuon 14,POINT (2.35953 48.82636),452991.9,5408351.7
node,253748421,restaurant,La Cabane,POINT (2.23364 48.70124),443612.2,5394528.5
node,260282170,restaurant,Casa Mario,POINT (2.26782 48.82349),446257.5,5408093.3
node,...,...,...,...,...,...
node,10024004174,greengrocer,Au Marché Fruité,POINT (2.19477 48.88968),440973.4,5415504.8
way,85233774,greengrocer,Univers Primeurs,POINT (2.09779 49.01191),434026.0,5429172.1
way,102452935,greengrocer,Gepi - Les Halles,POINT (2.11417 49.05216),435276.4,5433632.2
way,382037582,greengrocer,L'Entrepot,POINT (1.90211 49.01098),419715.8,5429257.2
