In [1]:
import pandas as pd

In [16]:
# Load the DataFrame
df_parl = pd.read_csv(
    "data/wahl70_sprengel.csv", delimiter=";", encoding="ISO-8859-1", skiprows=1
)

# Filter the columns to get 'Sprengel_Nr', all 'Prozente' and 'Name/Wählergruppe' columns
columns_of_interest = ["Sprengel_Nr"] + [
    col for col in df_parl.columns if "Prozente" in col or "Name/Wählergruppe" in col
]
simplified_df = df_parl[columns_of_interest].copy()

# To ensure all wählergruppen are included, determine the maximum index from column names
max_wahlergruppe_index = max(
    int(col.split("Wählergruppe")[-1])
    for col in df_parl.columns
    if "Wählergruppe" in col and any(char.isdigit() for char in col)
)

# Rename the 'Prozente' columns to reflect the corresponding party names and then drop 'Name/Wählergruppe' columns
name_columns_to_drop = []
for i in range(1, max_wahlergruppe_index + 1):
    percent_col = f"Prozente/Wählergruppe{i}"
    name_col = f"Name/Wählergruppe{i}"
    if percent_col in simplified_df.columns and name_col in simplified_df.columns:
        party_name = simplified_df[name_col].iloc[
            0
        ]  # Assuming the first row gives the party name
        # Use regex to extract the last part of the party name after the last " - "
        short_name = party_name.split(" - ")[-1]
        simplified_df.rename(columns={percent_col: short_name}, inplace=True)
        name_columns_to_drop.append(name_col)

simplified_df.drop(columns=name_columns_to_drop, inplace=True)

# Convert all percentage columns from string to float, replacing commas with dots
for col in simplified_df.columns:
    if col != "Sprengel_Nr":  # Skip the 'Sprengel_Nr' column
        simplified_df[col] = simplified_df[col].str.replace(",", ".").astype(float)

# Create 'Gewinner' column by finding the party with the highest percentage per row
simplified_df["Gewinner"] = simplified_df.drop(["Sprengel_Nr"], axis=1).idxmax(axis=1)

# Convert 'Sprengel_Nr' to an integer and then to a zero-padded string
simplified_df["Sprengel_Nr"] = (
    pd.to_numeric(simplified_df["Sprengel_Nr"], errors="coerce").fillna(0).astype(int)
)
simplified_df["Sprengel_Nr"] = simplified_df["Sprengel_Nr"].apply(lambda x: f"{x:03}")

# Save the DataFrame to a CSV file with UTF-8 encoding
simplified_df.to_csv("data/ergebnis_partei.csv", index=False, encoding="UTF8")

In [5]:
import pandas as pd


# Helper function to reverse and format names
def format_name(name):
    titles_to_remove = {"mag", "dr", "ing", "msc", "mba", "dr", "mag"}
    parts = name.replace(".", "").replace(",", "").split()
    parts = [part for part in parts if part.lower() not in titles_to_remove]
    return " ".join(parts[::-1])


# Load the DataFrame
df_parl = pd.read_csv(
    "data/wahl70_sprengel_bgm.csv", delimiter=";", encoding="ISO-8859-1", skiprows=1
)

# Determine columns of interest
columns_of_interest = ["Sprengel_Nr"] + [
    col for col in df_parl.columns if "Prozente" in col or "Name/Kandidat" in col
]
simplified_df = df_parl[columns_of_interest].copy()

# Rename columns and drop unnecessary ones
name_columns_to_drop = []
for i in range(
    1,
    max(
        int(col.split("Kandidat")[-1])
        for col in df_parl.columns
        if "Kandidat" in col and any(char.isdigit() for char in col)
    )
    + 1,
):
    percent_col = f"Prozente/Kandidat{i}"
    name_col = f"Name/Kandidat{i}"
    if percent_col in simplified_df.columns and name_col in simplified_df.columns:
        party_name = df_parl[name_col].iloc[0]
        formatted_name = format_name(party_name)
        simplified_df.rename(columns={percent_col: formatted_name}, inplace=True)
        name_columns_to_drop.append(name_col)

simplified_df.drop(columns=name_columns_to_drop, inplace=True)

# Convert percentage columns and identify winners
for col in simplified_df.columns:
    if col != "Sprengel_Nr":
        simplified_df[col] = simplified_df[col].str.replace(",", ".").astype(float)
simplified_df["Gewinner"] = simplified_df.drop("Sprengel_Nr", axis=1).idxmax(axis=1)
simplified_df["Sprengel_Nr"] = (
    pd.to_numeric(simplified_df["Sprengel_Nr"], errors="coerce")
    .fillna(0)
    .astype(int)
    .apply(lambda x: f"{x:03}")
)
simplified_df.to_csv("data/ergebnis_bgm.csv", index=False, encoding="UTF-8")

In [2]:
import pandas as pd


# Helper function to reverse and format names
def format_name(name):
    # Dictionary to map full name strings to desired candidate names
    name_map = {
        "Willi Georg (Georg Willi - Die Innsbrucker Grünen)": "Georg Willi",
        "Ing. Mag. Anzengruber Johannes (JA - Jetzt Innsbruck - Johannes Anzengruber)": "Johannes Anzengruber"
    }
    # Return the simplified name if present in the map, otherwise return the original name
    return name_map.get(name, name)


# Load the DataFrame
df_parl = pd.read_csv(
    "data/wahl74_sprengel.csv", delimiter=";", encoding="ISO-8859-1", skiprows=1
)

# Determine columns of interest
columns_of_interest = ["Sprengel_Nr"] + [
    col for col in df_parl.columns if "Prozente" in col or "Name/Kandidat" in col
]
simplified_df = df_parl[columns_of_interest].copy()

# Rename columns and drop unnecessary ones
name_columns_to_drop = []
for i in range(
    1,
    max(
        int(col.split("Kandidat")[-1])
        for col in df_parl.columns
        if "Kandidat" in col and any(char.isdigit() for char in col)
    )
    + 1,
):
    percent_col = f"Prozente/Kandidat{i}"
    name_col = f"Name/Kandidat{i}"
    if percent_col in simplified_df.columns and name_col in simplified_df.columns:
        party_name = df_parl[name_col].iloc[0]
        formatted_name = format_name(party_name)
        simplified_df.rename(columns={percent_col: formatted_name}, inplace=True)
        name_columns_to_drop.append(name_col)

simplified_df.drop(columns=name_columns_to_drop, inplace=True)

# Convert percentage columns and identify winners
for col in simplified_df.columns:
    if col != "Sprengel_Nr":
        simplified_df[col] = simplified_df[col].str.replace(",", ".").astype(float)
simplified_df["Gewinner"] = simplified_df.drop("Sprengel_Nr", axis=1).idxmax(axis=1)
simplified_df["Sprengel_Nr"] = (
    pd.to_numeric(simplified_df["Sprengel_Nr"], errors="coerce")
    .fillna(0)
    .astype(int)
    .apply(lambda x: f"{x:03}")
)
simplified_df.to_csv("data/ergebnis_bgm_stichwahl.csv", index=False, encoding="UTF-8")