In [1]:
from pathlib import Path
import pandas as pd
from municipality_mapping import MunicipalityCodeMapper, MunicipalityNameMatcher, CONFIG

In [2]:
code_mapper = MunicipalityCodeMapper()
name_matcher = MunicipalityNameMatcher(CONFIG, "01-01-1981")

Found 87 Gemeindestände since 01-01-1981! Latest: 01-01-2024


Updating Gemeindestände: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s]


Found 87 Gemeindestände since 01-01-1981! Latest: 01-01-2024


Updating Gemeindestände: 100%|██████████| 87/87 [00:14<00:00,  5.92it/s]


# NAME MATCHING

In [9]:
test_df = pd.DataFrame({
    'municipality_name': ['Zürich', 'Buchs (AG)', 'Buchs', 'Berlin (DE)', 'St.Gallen', 'Appenzel', 'Ballkesymme', '']
})

In [10]:
result_df = name_matcher.match_dataframe(test_df, query_column='municipality_name')

Found 4 exact matches!


Matching 4 names: 100%|██████████| 4/4 [00:00<00:00, 16.84it/s]


In [11]:
result_df

Unnamed: 0,municipality_name,normalized,bfs_gmde_stand_origin,bfs_gmde_code_origin,candidate_codes,confidence
0,Zürich,zuerich,01-01-2024,261,,1.0
1,Buchs (AG),buchs (ag),01-01-2024,4003,,1.0
2,Buchs,buchs,"01-01-2024, 01-01-2024, 01-01-2024",-2,"4003, 3271, 83",1.0
3,St.Gallen,st gallen,01-01-2024,3203,,1.0
4,Berlin (DE),berlin (de),,-1,,1.0
5,Ballkesymme,ballkesymme,,0,,0.0
6,,,,0,,0.0
7,Appenzel,appenzel,01-01-2024,3101,,0.871004


# CODE MAPPING

In [None]:
# Ambiguous names have to be resolved before!
no_ambiguous = result_df[result_df['bfs_gmde_code_origin'] != -2].copy()
await code_mapper.map_multiple_gemeindestaende_to_latest(no_ambiguous, code_column='bfs_gmde_code_origin', stand_column='bfs_gmde_stand_origin')

Unnamed: 0,municipality_name,normalized,bfs_gmde_stand_origin,bfs_gmde_code_origin,candidate_codes,confidence,bfs_gmde_code_01-01-2024
0,Zürich,zuerich,01-01-2024,261,,1.0,261
1,Buchs (AG),buchs (ag),01-01-2024,4003,,1.0,4003
3,St.Gallen,st gallen,01-01-2024,3203,,1.0,3203
4,Berlin (DE),berlin (de),,-1,,1.0,0
5,Ballkesymme,ballkesymme,,0,,0.0,0
6,,,,0,,0.0,0
7,Appenzel,appenzel,01-01-2024,3101,,0.871004,3101


In [None]:
await code_mapper.create_multi_mapping('22-09-1985', ['01-01-2024'], return_names=True)

Creating the mapping. This might take some time...
Date 22-09-1985 does not correspond to an official Gemeindestand. Using the closest: 01-01-1984.


Unnamed: 0,bfs_gmde_code_01-01-1984,bfs_gmde_name_01-01-1984,bfs_gmde_code_01-01-2024,bfs_gmde_name_01-01-2024
0,3501,Alvaschein,3542,Albula/Alvra
1,3403,Ganterschwil,3395,Bütschwil-Ganterschwil
2,3523,Wiesen (GR),3851,Davos
3,3522,Filisur,3544,Bergün Filisur
4,3521,Bergün/Bravuogn,3544,Bergün Filisur
...,...,...,...,...
3023,4561,Felben-Wellhausen,4561,Felben-Wellhausen
3024,4637,Siegershausen,4666,Kemmental
3025,6716,Mettembert,6716,Mettembert
3026,4074,Oberwil-Lieli,4074,Oberwil-Lieli


In [None]:
await code_mapper.create_multi_mapping('01-01-2020', ['01-01-2021', '20-12-2022', '20-11-2024'], return_names=False)

Creating the mapping. This might take some time...
Date 20-12-2022 does not correspond to an official Gemeindestand. Using the closest: 01-01-2023.
Date 20-11-2024 does not correspond to an official Gemeindestand. Using the closest: 01-01-2024.


Unnamed: 0,bfs_gmde_code_01-01-2020,bfs_gmde_code_01-01-2021,bfs_gmde_code_01-01-2023,bfs_gmde_code_01-01-2024
0,4122,4122,4122,4122
1,6158,6158,6158,6158
2,3023,3023,3023,3023
3,6011,6011,6011,6011
4,2228,2228,2228,2228
...,...,...,...,...
2197,1123,1123,1123,1123
2198,2117,2117,2117,2117
2199,2237,2237,2237,2237
2200,3901,3901,3901,3901


In [None]:
await code_mapper.create_multi_mapping('01-01-2020', ('01-01-2021', '20-11-2024'), return_names=False)

Creating the mapping. This might take some time...
No larger date found. Falling back to nearest.
Date 20-11-2024 does not correspond to an official Gemeindestand. Using the closest: 01-01-2024.


Unnamed: 0,bfs_gmde_code_01-01-2020,bfs_gmde_code_01-01-2021,bfs_gmde_code_18-04-2021,bfs_gmde_code_01-07-2021,bfs_gmde_code_01-01-2022,bfs_gmde_code_10-04-2022,bfs_gmde_code_01-05-2022,bfs_gmde_code_01-01-2023,bfs_gmde_code_01-01-2024
0,4122,4122,4122,4122,4122,4122,4122,4122,4122
1,6158,6158,6158,6158,6158,6158,6158,6158,6158
2,3023,3023,3023,3023,3023,3023,3023,3023,3023
3,6011,6011,6011,6011,6011,6011,6011,6011,6011
4,2228,2228,2228,2228,2228,2228,2228,2228,2228
...,...,...,...,...,...,...,...,...,...
2197,1123,1123,1123,1123,1123,1123,1123,1123,1123
2198,2117,2117,2117,2117,2117,2117,2117,2117,2117
2199,2237,2237,2237,2237,2237,2237,2237,2237,2237
2200,3901,3901,3901,3901,3901,3901,3901,3901,3901


In [None]:
await code_mapper.create_mapping('01-01-2020', '01-01-2024', export_path='./mappings')

Save mapping to: ./mappings/mapping_01-01-2020_01-01-2024.xlsx


Unnamed: 0,InitialHistoricalCode,InitialCode,InitialName,InitialParentHistoricalCode,InitialParentName,InitialStep,TerminalHistoricalCode,TerminalCode,TerminalName,TerminalParentHistoricalCode,TerminalParentName,TerminalStep
0,10009,4122,Villnachern,10023,Bezirk Brugg,,10009,4122,Villnachern,10023,Bezirk Brugg,
1,10078,6158,Vionnaz,10013,District de Monthey,,10078,6158,Vionnaz,10013,District de Monthey,
2,10157,3023,Speicher,10098,Bezirk Mittelland,,10157,3023,Speicher,10098,Bezirk Mittelland,
3,10159,6011,Zwischbergen,10035,Bezirk Brig,,10159,6011,Zwischbergen,10035,Bezirk Brig,
4,10162,2228,Villars-sur-Glâne,10104,District de la Sarine,,10162,2228,Villars-sur-Glâne,10104,District de la Sarine,
...,...,...,...,...,...,...,...,...,...,...,...,...
2197,16131,1123,Altishofen,10302,Wahlkreis Willisau,,16131,1123,Altishofen,10302,Wahlkreis Willisau,
2198,16132,2117,Villaz,10106,District de la Glâne,,16132,2117,Villaz,10106,District de la Glâne,
2199,16133,2237,Prez,10104,District de la Sarine,,16133,2237,Prez,10104,District de la Sarine,
2200,16134,3901,Chur,10314,Region Plessur,26.0,16599,3901,Chur,10314,Region Plessur,26.0


In [11]:
# Constants
SOURCE_PATH = Path('../data/raw')
TARGET_PATH = Path('../data/results')

# Set the file path
FOLDER = 'other_fem_votes'
FILE_NAME = '336NeuesEheundErbrecht.xlsx'

(TARGET_PATH / FOLDER).mkdir(parents=True, exist_ok=True)
def rename_original_columns(df, code_col, name_col, origin):
    return df.rename(columns={
        code_col: f"bfs_gmde_nummer_{origin}",
        name_col: f"bfs_gmde_name_{origin}"
    })
# Load the data to map
suffix = FILE_NAME.split('.')[-1]
if suffix == 'csv':
    df = pd.read_csv(SOURCE_PATH / FOLDER / FILE_NAME, encoding='ISO-8859-1')
elif suffix == 'xlsx':
    df = pd.read_excel(SOURCE_PATH / FOLDER / FILE_NAME)
else:
    print(f'file .{suffix} format not supported!')
df.head()

Unnamed: 0,code,gemeinden,Stimmberechtigte,Abgegebene Stimmen,Gültige Stimmen,JA
0,1,Aeugst am Albis,679,366,362,188
1,2,Affoltern am Albis,5083,2130,2112,1168
2,3,Bonstetten,1701,911,899,511
3,4,Hausen am Albis,1583,801,793,439
4,5,Hedingen,1325,626,622,352


In [12]:
await code_mapper.map_dataframe(df, code_column='code', name_column='gemeinden', target=['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024'])

Found lakes or foreign territories: {np.int64(9120), np.int64(9161), np.int64(9212), np.int64(9252)}. Temporarily removing them for Gemeindestands search...
Non-BFS codes detected!Removing {4485, 4870, 4615, 4745, 4490, 4625, 4755, 4505, 4890, 4640, 4770, 4515, 4900, 4780, 4525, 4910, 4405, 4920, 4540, 4670, 4935, 4940, 4685, 4815, 4560, 4695, 4830, 4960, 4705, 4580, 4845, 4720, 4725}
Inferred Gemeindestand: 01-01-1984
Creating the mapping. This might take some time...
Mapped DataFrame from 01-01-1984 to ['01-01-2016', '01-01-2017', '01-01-2018', '01-01-2019', '01-01-2020', '01-01-2021', '01-01-2022', '01-01-2023', '01-01-2024'] Gemeindestand.


Unnamed: 0,bfs_gmde_code_01-01-1984,gemeinden,Stimmberechtigte,Abgegebene Stimmen,Gültige Stimmen,JA,bfs_gmde_code_01-01-2016,bfs_gmde_code_01-01-2017,bfs_gmde_code_01-01-2018,bfs_gmde_code_01-01-2019,bfs_gmde_code_01-01-2020,bfs_gmde_code_01-01-2021,bfs_gmde_code_01-01-2022,bfs_gmde_code_01-01-2023,bfs_gmde_code_01-01-2024
0,1,Aeugst am Albis,679,366,362,188,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2,Affoltern am Albis,5083,2130,2112,1168,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
2,3,Bonstetten,1701,911,899,511,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
3,4,Hausen am Albis,1583,801,793,439,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
4,5,Hedingen,1325,626,622,352,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2906,6806,Vendlincourt,368,119,116,67,6806.0,6806.0,6806.0,6806.0,6806.0,6806.0,6806.0,6806.0,6806.0
2907,9120,BS-Auslandschweizer,224,0,0,0,,,,,,,,,
2908,9161,AI-Korrespondenzweg,0,70,70,50,,,,,,,,,
2909,9212,TI-Korrespondenzweg,0,390,378,299,,,,,,,,,
