In [8]:
from pathlib import Path
import pandas as pd
from datetime import datetime
from municipality_mapping import (
    MunicipalityCodeMapper, MunicipalityNameMatcher, CONFIG
)

In [2]:
mcm = MunicipalityCodeMapper()
mnm = MunicipalityNameMatcher(config=CONFIG)

Found 89 Gemeindestände since 01-01-1981! Latest: 06-04-2025


Updating Gemeindestände: 100%|██████████| 89/89 [00:12<00:00,  6.95it/s]


Found 34 Gemeindestände since 01-01-2010! Latest: 06-04-2025


Updating Gemeindestände: 100%|██████████| 34/34 [00:04<00:00,  7.78it/s]


In [3]:

# Constants
SOURCE_PATH = Path('../data/raw')
TARGET_PATH = Path('../data/results')

# Set the file path
FOLDER = 'startupdata_mun'
FILE_NAME = 'startupdata_mun.csv'

(TARGET_PATH / FOLDER).mkdir(parents=True, exist_ok=True)

In [4]:
def rename_original_columns(df, code_col, name_col, origin):
    return df.rename(columns={
        code_col: f"bfs_gmde_nummer_{origin}",
        name_col: f"bfs_gmde_name_{origin}"
    })

In [5]:
# Load the data to map
suffix = FILE_NAME.split('.')[-1]
if suffix == 'csv':
    df = pd.read_csv(SOURCE_PATH / FOLDER / FILE_NAME, encoding='ISO-8859-1')
elif suffix == 'xlsx':
    df = pd.read_excel(SOURCE_PATH / FOLDER / FILE_NAME)
else:
    print(f'file .{suffix} format not supported!')
df.head()

Unnamed: 0,registry_office_canton,year,town,Gemeindenummer,mun_all,mun_all_Growth,mun_all_Sole,mun_all_Corp,mun_green,mun_green_Growth,...,mun_femdom_Corp,mun_maldom,mun_maldom_Swissdom,mun_maldom_Swissdom_Growth,mun_maldom_Swissdom_Sole,mun_maldom_Swissdom_Corp,mun_maldom_Growth,mun_maldom_Sole,mun_maldom_Corp,mun_JointStockCorporation
0,ZH,2016,Aeugst am Albis,1,7,1,6,0,0,0,...,0,4,1,0,1,0,0,4,0,0
1,ZH,2017,Aeugst am Albis,1,4,3,1,0,1,1,...,0,2,2,2,0,0,2,0,0,0
2,LU,2018,Aeugst am Albis,1,2,2,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
3,ZH,2019,Aeugst am Albis,1,6,4,2,1,0,0,...,0,4,3,2,1,1,2,2,1,1
4,ZH,2020,Aeugst am Albis,1,8,6,2,2,0,0,...,0,1,1,1,0,0,1,0,0,2


In [6]:
df_mapped = await mcm.map_dataframe(df, code_column='Gemeindenummer', name_column='town', target=['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024'])

Found shared territories (Kommunanz): {5238}. Temporarily removing them for Gemeindestands search...
Inferred Gemeindestand: 06-04-2025
Mapped DataFrame successfully!


In [7]:
df_mapped

Unnamed: 0,registry_office_canton,year,town,bfs_gmde_code_06-04-2025,mun_all,mun_all_Growth,mun_all_Sole,mun_all_Corp,mun_green,mun_green_Growth,...,mun_JointStockCorporation,bfs_gmde_code_01-01-2016,bfs_gmde_code_01-01-2017,bfs_gmde_code_01-01-2018,bfs_gmde_code_01-01-2019,bfs_gmde_code_01-01-2020,bfs_gmde_code_01-01-2021,bfs_gmde_code_01-01-2022,bfs_gmde_code_01-01-2023,bfs_gmde_code_01-01-2024
0,ZH,2016,Aeugst am Albis,1,7,1,6,0,0,0,...,0,1,1,1,1,1,1,1,1,1
1,ZH,2017,Aeugst am Albis,1,4,3,1,0,1,1,...,0,1,1,1,1,1,1,1,1,1
2,LU,2018,Aeugst am Albis,1,2,2,0,1,0,0,...,1,1,1,1,1,1,1,1,1,1
3,ZH,2019,Aeugst am Albis,1,6,4,2,1,0,0,...,1,1,1,1,1,1,1,1,1,1
4,ZH,2020,Aeugst am Albis,1,8,6,2,2,0,0,...,2,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20943,JU,2022,BeurnevÃÂ©sin,6812,5,2,3,0,0,0,...,0,6775,6775,6775,6775,6775,6775,6775,6775,6812
20944,JU,2023,Bonfol,6812,3,2,1,0,0,0,...,0,6773,6773,6773,6773,6773,6773,6773,6773,6812
20945,JU,2023,Bonfol,6812,3,2,1,0,0,0,...,0,6775,6775,6775,6775,6775,6775,6775,6775,6812
20946,JU,2024,BeurnevÃÂ©sin,6812,3,1,2,0,0,0,...,0,6773,6773,6773,6773,6773,6773,6773,6773,6812


In [9]:
# Load the data to map
suffix = FILE_NAME.split('.')[-1]
if suffix == 'csv':
    df_mapped.to_csv(TARGET_PATH / FOLDER / f'{datetime.today().strftime('%Y-%m-%d')}_{FILE_NAME.split('.')[0]}_mapped.csv', index=False)
elif suffix == 'xlsx':
    df_mapped.to_excel(TARGET_PATH / FOLDER / f'{datetime.today().strftime('%Y-%m-%d')}_{FILE_NAME.split('.')[0]}_mapped.xlsx', index=False)
else:
    print(f'file .{suffix} format not supported!')

In [None]:
mapping = await mcm.create_multi_mapping(origin='01-01-1984', targets=['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024'])

Creating the mapping. This might take some time...


In [7]:
df_mapped[df_mapped['bfs_gmde_nummer_01-01-2016'].isna()]

Unnamed: 0,bfs_gmde_nummer_01-01-1984,gemeinden,Stimmberechtigte,Abgegebene Stimmen,Gültige Stimmen,JA,bfs_gmde_nummer_01-01-2016,bfs_gmde_nummer_01-01-2017,bfs_gmde_nummer_01-01-2018,bfs_gmde_nummer_01-01-2019,bfs_gmde_nummer_01-01-2020,bfs_gmde_nummer_01-01-2021,bfs_gmde_nummer_01-01-2022,bfs_gmde_nummer_01-01-2023,bfs_gmde_nummer_01-01-2024
1861,4495,Hohentannen,348,207,203,39,,,,,,,,,
1866,4545,Diessenhofen,1635,718,709,319,,,,,,,,,
1871,4590,Hüttlingen,396,280,274,100,,,,,,,,,
1873,4605,Neunforn,487,295,291,123,,,,,,,,,
2907,9120,BS-Auslandschweizer,224,0,0,0,,,,,,,,,
2908,9161,AI-Korrespondenzweg,0,70,70,50,,,,,,,,,
2909,9212,TI-Korrespondenzweg,0,390,378,299,,,,,,,,,
2910,9252,GE-Korrespondenzweg,0,6,6,4,,,,,,,,,


In [None]:
df_mapped

<coroutine object GemeindeMapper.map_dataframe at 0x118cb4940>

In [80]:
df = df[df['Kanton (-) / Bezirk (>>) / Gemeinde (......)'].str.contains('......', regex=False)]
df['gmde_nr'] = [int(s[6:10]) for s in df['Kanton (-) / Bezirk (>>) / Gemeinde (......)'].to_list()]
df['gmde_name'] = [s[10:] for s in df['Kanton (-) / Bezirk (>>) / Gemeinde (......)'].to_list()]
df.drop(columns=['Kanton (-) / Bezirk (>>) / Gemeinde (......)'], inplace=True)

df_1980 = df[df.Jahr == 1980]
df_1990 = df[df.Jahr == 1990]
df_2000 = df[df.Jahr == 2000]

In [88]:
df = df_1980

In [89]:
# Try to automatically determine the gemeindestand of the data
# It can happen that no gemeindestand can be found nor inferred, then you have to provide it yourself
origin = gm.find_gemeindestand(df, 'gmde_nr')

Found Gemeindestand: 01-10-2000.


In [59]:
# Define the target gemeindestände to create the mapping
targets = ['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024']

mapping = await gm.create_multi_mapping(origin, targets)
mapping

Creating the mapping. This might take some time...


Unnamed: 0,bfs_gmde_nummer_01-10-2000,bfs_gmde_name_01-10-2000,bfs_gmde_nummer_01-01-2016,bfs_gmde_name_01-01-2016,bfs_gmde_nummer_01-01-2017,bfs_gmde_name_01-01-2017,bfs_gmde_nummer_01-01-2018,bfs_gmde_name_01-01-2018,bfs_gmde_nummer_01-01-2019,bfs_gmde_name_01-01-2019,bfs_gmde_nummer_01-01-2020,bfs_gmde_name_01-01-2020,bfs_gmde_nummer_01-01-2021,bfs_gmde_name_01-01-2021,bfs_gmde_nummer_01-01-2022,bfs_gmde_name_01-01-2022,bfs_gmde_nummer_01-01-2023,bfs_gmde_name_01-01-2023,bfs_gmde_nummer_01-01-2024,bfs_gmde_name_01-01-2024
0,3501,Alvaschein,3542,Albula/Alvra,,,,,,,,,,,,,,,,
1,3403,Ganterschwil,3395,Bütschwil-Ganterschwil,,,,,,,,,,,,,,,,
2,3523,Wiesen (GR),3851,Davos,3851.0,Davos,3851.0,Davos,3851.0,Davos,3851.0,Davos,3851.0,Davos,3851.0,Davos,3851.0,Davos,3851.0,Davos
3,3522,Filisur,3522,Filisur,3522.0,Filisur,3544.0,Bergün Filisur,,,,,,,,,,,,
4,3521,Bergün/Bravuogn,3521,Bergün/Bravuogn,3521.0,Bergün/Bravuogn,3544.0,Bergün Filisur,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2927,2200,Grolley,2200,Grolley,2200.0,Grolley,2200.0,Grolley,2200.0,Grolley,2200.0,Grolley,2200.0,Grolley,2200.0,Grolley,2200.0,Grolley,2200.0,Grolley
2928,2262,Gurmels,2262,Gurmels,2262.0,Gurmels,2262.0,Gurmels,2262.0,Gurmels,2262.0,Gurmels,2262.0,Gurmels,2262.0,Gurmels,2262.0,Gurmels,2262.0,Gurmels
2929,4545,Diessenhofen,4545,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen,4545.0,Diessenhofen
2930,6073,Grafschaft,6073,Grafschaft,6077.0,Goms,,,,,,,,,,,,,,


In [90]:
# Rename the columns appropriately
df = rename_original_columns(df, 'gmde_nr', 'gmde_name', origin)

# Merge the mapping 
df_merged = df.merge(mapping, on=[f'bfs_gmde_nummer_{origin}'], how='left')
df_merged = df_merged.drop(columns=[f'bfs_gmde_name_{origin}_x']).rename(columns={f'bfs_gmde_name_{origin}_y': f'bfs_gmde_name_{origin}'})
df_merged.to_excel(TARGET_PATH / FOLDER / f'{datetime.today().strftime('%Y-%m-%d')}_{FILE_NAME.split('.')[0]}_mapped.xlsx', index=False)

In [None]:
df = pd.read_csv('startupdata_mun.csv')

df['town'] = df.town.str.replace('Ãª', 'ê').str.replace('Ã©', 'é').str.replace('Ã¨', 'è').str.replace('Ã®', 'î').str.replace('Ã¢', 'â').str.replace('Ã´', 'ô').str.replace('Ã¼', 'ü').str.replace('Ã¶', 'ö').str.replace('Ã¤', 'ä')

# Save the file with proper encoding (e.g., UTF-8)
df.to_csv('startupdata_mun.csv', encoding='utf-8', index=False)

df = df.sample(n=1000)
matched_df = name_matcher.match_dataframe(df, query_column='town', threshold=0.5)

'2024-10-16'

In [None]:
df_mapped = await code_mapper.map_dataframe(df, 'Gemeindenummer', 'town', origin='01-01-2024', target=['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024'], return_names=True)

In [None]:
df_mapped.to_csv('startupdata_mun_mapped.csv', index=False)