In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
from mapper import GemeindeMapper

gm = GemeindeMapper()

# Constants
SOURCE_PATH = Path('../raw')
TARGET_PATH = Path('../results')

# Set the file path
FOLDER = 'RMA'
FILE_NAME = 'RMA.xlsx'

(TARGET_PATH / FOLDER).mkdir(parents=True, exist_ok=True)

In [2]:
def rename_original_columns(df, code_col, name_col, origin):
    return df.rename(columns={
        code_col: f"bfs_gmde_nummer_{origin}",
        name_col: f"bfs_gmde_name_{origin}"
    })

In [3]:
# Load the data to map
suffix = FILE_NAME.split('.')[-1]
if suffix == 'csv':
    df = pd.read_csv(SOURCE_PATH / FOLDER / FILE_NAME)
elif suffix == 'xlsx':
    df = pd.read_excel(SOURCE_PATH / FOLDER / FILE_NAME)
else:
    print(f'file .{suffix} format not supported!')
df.head()

Unnamed: 0,Kantons-Nr.,Kanton,bfs_gmde_nummer_2016,bfs_gmde_name_2016,Stimmberechtigte,Abgegebene Stimmen,Stimmbeteiligung,Leere,Ungültige,Gültige,Ja-Stimmen,Nein-Stimmen,Ja in %
0,1,ZH,1,Aeugst am Albis,1395,718,51.469534,16,0,702,457,245,65.099715
1,1,ZH,2,Affoltern am Albis,7057,3211,45.500921,104,0,3107,1864,1243,59.993563
2,1,ZH,3,Bonstetten,3561,1956,54.928391,70,0,1886,1254,632,66.489926
3,1,ZH,4,Hausen am Albis,2429,1262,51.955537,56,0,1206,714,492,59.20398
4,1,ZH,5,Hedingen,2497,1373,54.985983,48,0,1325,871,454,65.735849


In [4]:
# Try to automatically determine the gemeindestand of the data
# It can happen that no gemeindestand can be found nor inferred, then you have to provide it yourself
origin = gm.find_gemeindestand(df, 'bfs_gmde_nummer_2016')

Found territory shared by several municipalities of Switzerland (Kommunanz)!
                  Temporarily removing [np.int64(5238)] for Gemeindestands search...
Found lake (kant. Seeareal) or foreign territory!
                  Temporarily removing [np.int64(9030), np.int64(9040), np.int64(9100), np.int64(9120), np.int64(9160), np.int64(9170), np.int64(9190), np.int64(9200), np.int64(9220), np.int64(9230), np.int64(9250)] for Gemeindestands search...
Inferred Gemeindestand: 10-04-2016


In [5]:
# Define the target gemeindestände to create the mapping
targets = ['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024']

mapping = await gm.create_multi_mapping(origin, targets)
mapping

Creating the mapping. This might take some time...


In [22]:
# Rename the columns appropriately
df = rename_original_columns(df, 'bfs_gmde_nummer_2016', 'bfs_gmde_name_2016', origin)

# Merge the mapping 
df_merged = df.merge(mapping, on=[f'bfs_gmde_nummer_{origin}'], how='left')
df_merged = df_merged.drop(columns=[f'bfs_gmde_name_{origin}_x']).rename(columns={f'bfs_gmde_name_{origin}_y': f'bfs_gmde_name_{origin}'})
df_merged.to_excel(TARGET_PATH / FOLDER / f'{FILE_NAME.split('.')[0]}_mapped.xlsx', index=False)