In [1]:
import pandas as pd
import geopandas as gpd

### Data loading

#### Load SHP data

In [2]:
gdf_ch = gpd.read_file('data/PLZO_PLZ.shp')
gdf_ch = gdf_ch.to_crs('EPSG:4326') ### Check which nomenclature ###

In [3]:
print("Shape:", gdf_ch.shape)
gdf_ch.head(5)

Shape: (4133, 7)


Unnamed: 0,UUID,OS_UUID,STATUS,INAEND,PLZ,ZUSZIFF,geometry
0,{0072F991-E46D-447E-A3BE-75467DB57FFC},{281807DC-9C0B-4364-9A55-0E8956876194},real,nein,3920,0,"POLYGON ((7.57586 45.98819, 7.57660 45.98882, ..."
1,{C3D3316F-1DFE-468E-BFC5-5C2308743B4C},{F065D58C-3F88-46EF-9AA0-DA0A96766333},real,nein,3864,0,"POLYGON ((8.11446 46.54657, 8.11535 46.54715, ..."
2,{479E660B-A0A5-4297-AA66-FA62735EFDC6},{45243689-766B-4FFC-9A14-AF0D17AADE48},real,nein,1948,1,"POLYGON ((7.29419 45.92253, 7.29421 45.92344, ..."
3,{FDFBFFDF-11C2-4CC9-B903-EF17677388BF},{678407FD-30DD-4699-A2D7-FD3602AD9EF0},real,nein,7504,0,"POLYGON ((9.83195 46.36054, 9.83206 46.36083, ..."
4,{CB229C54-DF46-45A0-B75F-6E77240B4B03},{D4A72AA9-CF35-4F14-8AD4-03F2EDAC5BA1},real,nein,3984,2,"POLYGON ((8.11446 46.54657, 8.11497 46.54618, ..."


In [4]:
npa_ch = pd.read_excel('data/npa_suisse.xls', sheet_name="PLZ4 -> GDE - NPA4 -> COM", skiprows= 10)

In [5]:
print("Shape:", npa_ch.shape)
npa_ch.head(5)

Shape: (4934, 6)


Unnamed: 0,PLZ4,%_IN_GDE,GDENR,KTKZ,GDENAMK,Unnamed: 5
0,1000,100.0,5586,VD,Lausanne,
1,1003,100.0,5586,VD,Lausanne,
2,1004,100.0,5586,VD,Lausanne,
3,1005,100.0,5586,VD,Lausanne,
4,1006,99.9,5586,VD,Lausanne,


### Data cleaning

**Note on the length of each file**

SHP file:
- 4133 line
- 4079 unique PLZ + ZUSZIFF
- 3196 unique PLZ
- 470 PLZ in VD
- 334 unique PLZ in VD

NPA file:  
- 4934 line
- 3186 unique PLZ
- 498 PLZ in VD
- 334 unique PLZ in VD

In [6]:
# Obtain a Serie with all the NPA in Vaud
npa_vd = npa_ch.query("KTKZ == 'VD'")['PLZ4'].drop_duplicates()
npa_vd.name = 'NPA'

In [7]:
# The GeoDataFrame containing all the NPA in Vaud (the tuple (PLZ, ZUSZIFF) is unique)
gdf_vd = gdf_ch[gdf_ch['PLZ'].isin(npa_vd)][['PLZ', 'ZUSZIFF', 'geometry']]
gdf_vd.rename(columns= {'PLZ': 'NPA'}, inplace= True)

### Write GeoJSON data

In [9]:
gdf_vd.to_file("vaud_npa.geojson", driver="GeoJSON")