# Add Language

The csv-Data of [Amtliches Verzeichnis der Strassen](https://www.cadastre.ch/de/services/service/registry/street.html) does not contain information about the language of the streetname label. As this information are available on the Filegeodatabase of the Amtliches Verzeichnis der Strassen, this will be join with this notebook.

---
## Donload and load with geopanda
Download the Filegeodatabase and load the data with fiona in a geodataframe

In [1]:
import urllib.request
import pandas as pd
    
url = 'https://data.geo.admin.ch/ch.swisstopo.amtliches-strassenverzeichnis/gdb/2056/ch.swisstopo.amtliches-strassenverzeichnis.zip'
filehandle, _ = urllib.request.urlretrieve(url)

In [2]:
from zipfile import ZipFile

with ZipFile(filehandle, 'r') as zip:
    #zip.printdir()
    zip.extractall("pure_str")

In [3]:
import geopandas
pureStn = geopandas.read_file("pure_str/pure_str.gdb", layer="PURE_STN")

In [4]:
import shutil
shutil.rmtree("pure_str")

In [5]:
for layer in pureStn:
    print(layer)

STN_ID
STR_ESID
STN_TEXT
STN_SHORT
STN_INDEX
STN_LANG
geometry


In [6]:
pureStn.head()

Unnamed: 0,STN_ID,STR_ESID,STN_TEXT,STN_SHORT,STN_INDEX,STN_LANG,geometry
0,761294.0,10258316.0,Eggwald,Eggwald,Egg,0,
1,469894.0,10023770.0,Wiedenweg,Wiedenweg,Wie,0,
2,576880.0,10179192.0,Wuhrbärgli,Wuhrbärgli,Wuh,0,
3,747045.0,10250501.0,Hüethütte Unter Trübsee,Hüethütte Unter Trübsee,Hüe,0,
4,565831.0,10163108.0,Heimstenstich,Heimstenstich,Hei,0,


In [7]:
pureStn.sort_values("STN_LANG")

Unnamed: 0,STN_ID,STR_ESID,STN_TEXT,STN_SHORT,STN_INDEX,STN_LANG,geometry
0,761294.0,10258316.0,Eggwald,Eggwald,Egg,0,
143510,746750.0,10250271.0,Tüfenwiesstrasse,Tüfenwiesstr.,Tüf,0,
143511,525093.0,10097218.0,Käsereiweg,Käsereiweg,Käs,0,
143512,689689.0,10226376.0,Steinenbachweg,Steinenbachweg,Ste,0,
143513,690232.0,10226919.0,Hirschenengasse,Hirschenengasse,Hir,0,
...,...,...,...,...,...,...,...
171976,761115.0,10258223.0,Via Cunel,Via Cunel,Cun,3,
171978,761087.0,10258195.0,Via Cavorgia,Via Cavorgia,Cav,3,
171979,761061.0,10258167.0,Via Vin Val,Via Vin Val,Vin,3,
171965,761073.0,10258182.0,Via Scalinada,Via Scalinada,Sca,3,


---
## Create the lookup
The Information about the Language are represented by numbers. The wil be replaced be language-code

In [8]:
STN_LANG = pureStn.STN_LANG.value_counts().reset_index()

STN_LANG.head()

Unnamed: 0,index,STN_LANG
0,0,165949
1,1,43434
2,2,9991
3,3,2332


In [9]:
pureStn.loc[pureStn['STN_LANG'] == 0, 'STN_LANG'] = 'de'
pureStn.loc[pureStn['STN_LANG'] == 1, 'STN_LANG'] = 'fr'
pureStn.loc[pureStn['STN_LANG'] == 2, 'STN_LANG'] = 'it'
pureStn.loc[pureStn['STN_LANG'] == 3, 'STN_LANG'] = 'rm'

In [10]:
pureStn.head()

Unnamed: 0,STN_ID,STR_ESID,STN_TEXT,STN_SHORT,STN_INDEX,STN_LANG,geometry
0,761294.0,10258316.0,Eggwald,Eggwald,Egg,de,
1,469894.0,10023770.0,Wiedenweg,Wiedenweg,Wie,de,
2,576880.0,10179192.0,Wuhrbärgli,Wuhrbärgli,Wuh,de,
3,747045.0,10250501.0,Hüethütte Unter Trübsee,Hüethütte Unter Trübsee,Hüe,de,
4,565831.0,10163108.0,Heimstenstich,Heimstenstich,Hei,de,


---
## Join the language to streetnames.csv

In [11]:
streetnames = pd.read_csv('streetnames.csv', encoding='UTF-8-SIG', sep=';')

In [12]:
streetnames.head()

Unnamed: 0.1,Unnamed: 0,STR_ESID,STN_LABEL,COM_FOSNR,COM_NAME,COM_CANTON,STR_OFFICIAL,STR_EASTING,STR_NORTHING,STN_LABEL_NO_BI,STN_LABEL_NO_TERMS,STR_TERMS,STR_PREPS,STN_LABEL_FINAL
0,1,10023770,Wiedenweg,2786,Grellingen,BL,True,2610733.0,1254311.0,Wiedenweg,Wieden,weg,,Wieden
1,2,10179192,Wuhrbärgli,2788,Liesberg,BL,True,2598709.0,1249640.0,Wuhrbärgli,Wuhrbärgli,,,Wuhrbärgli
2,9,10140563,Emanuelenweg,2829,Liestal,BL,True,2623078.0,1257558.0,Emanuelenweg,Emanuelen,weg,,Emanuelen
3,13,10069457,Löhrweg,2850,Känerkinden,BL,True,2630229.0,1251411.0,Löhrweg,Löhr,weg,,Löhr
4,15,10096235,Brunngasse,2833,Seltisberg,BL,True,2621406.0,1256852.0,Brunngasse,Brunn,gasse,,Brunn


In [13]:
streetnamesLangTemp = pureStn[["STR_ESID", "STN_LANG"]]

In [14]:
streetnamesLang = streetnames.join(streetnamesLangTemp.set_index('STR_ESID'), on='STR_ESID')

In [15]:
streetnamesLang.drop(["Unnamed: 0"], axis=1, inplace=True)

In [16]:
streetnamesLang.head()

Unnamed: 0,STR_ESID,STN_LABEL,COM_FOSNR,COM_NAME,COM_CANTON,STR_OFFICIAL,STR_EASTING,STR_NORTHING,STN_LABEL_NO_BI,STN_LABEL_NO_TERMS,STR_TERMS,STR_PREPS,STN_LABEL_FINAL,STN_LANG
0,10023770,Wiedenweg,2786,Grellingen,BL,True,2610733.0,1254311.0,Wiedenweg,Wieden,weg,,Wieden,de
1,10179192,Wuhrbärgli,2788,Liesberg,BL,True,2598709.0,1249640.0,Wuhrbärgli,Wuhrbärgli,,,Wuhrbärgli,de
2,10140563,Emanuelenweg,2829,Liestal,BL,True,2623078.0,1257558.0,Emanuelenweg,Emanuelen,weg,,Emanuelen,de
3,10069457,Löhrweg,2850,Känerkinden,BL,True,2630229.0,1251411.0,Löhrweg,Löhr,weg,,Löhr,de
4,10096235,Brunngasse,2833,Seltisberg,BL,True,2621406.0,1256852.0,Brunngasse,Brunn,gasse,,Brunn,de


In [17]:
streetnamesLang.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 172265 entries, 0 to 171878
Data columns (total 14 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   STR_ESID            172265 non-null  int64  
 1   STN_LABEL           172265 non-null  object 
 2   COM_FOSNR           172265 non-null  int64  
 3   COM_NAME            172265 non-null  object 
 4   COM_CANTON          172265 non-null  object 
 5   STR_OFFICIAL        172265 non-null  bool   
 6   STR_EASTING         172265 non-null  float64
 7   STR_NORTHING        172265 non-null  float64
 8   STN_LABEL_NO_BI     172265 non-null  object 
 9   STN_LABEL_NO_TERMS  171391 non-null  object 
 10  STR_TERMS           140368 non-null  object 
 11  STR_PREPS           6818 non-null    object 
 12  STN_LABEL_FINAL     171079 non-null  object 
 13  STN_LANG            172265 non-null  object 
dtypes: bool(1), float64(2), int64(2), object(9)
memory usage: 18.6+ MB


In [18]:
streetnamesLang.to_csv('streetnames.csv', encoding='UTF-8-SIG', sep=';')