In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

In [2]:
# wget https://www.bundeswahlleiter.de/bundestagswahlen/2021/ergebnisse/opendata/daten/gesamtergebnis_01.xml
path = "gesamtergebnis_01.xml"
tree = ET.parse(path)
root = tree.getroot()

In [3]:
num_all_votes = 0
votes = {}
for gruppenergebnis_node in tree.findall("./Gebietsergebnis[@Gebietsart='BUND']/Gruppenergebnis"):
#     print(gruppenergebnis_node.tag, gruppenergebnis_node.attrib)
    if gruppenergebnis_node.attrib["Gruppenart"] == "ALLGEMEIN" and gruppenergebnis_node.attrib["Name"] == "Gültige":
        for stimmergebnis_node in gruppenergebnis_node.findall("./Stimmergebnis[@Stimmart='LISTE']"):
            num_all_votes = int(stimmergebnis_node.attrib["Anzahl"])
    elif gruppenergebnis_node.attrib["Gruppenart"] == "PARTEI":
        party = gruppenergebnis_node.attrib["Name"]
        for stimmergebnis_node in gruppenergebnis_node.findall("./Stimmergebnis[@Stimmart='LISTE']"):
            num_party_votes = stimmergebnis_node.attrib["Anzahl"]
            num_party_votes = None if num_party_votes == "n/a" else int(num_party_votes)
        if num_party_votes is None:
            continue
        votes[party] = num_party_votes
votes = pd.Series(votes)
print(votes)

CDU                      8770980
SPD                     11949756
AfD                      4802097
FDP                      5316698
DIE LINKE                2269993
GRÜNE                    6848215
CSU                      2402826
FREIE WÄHLER             1127171
Die PARTEI                461487
Tierschutzpartei          674789
NPD                        64608
PIRATEN                   169889
ÖDP                       112351
V-Partei³                  31966
DiB                         7291
BP                         32901
Tierschutzallianz          13686
MLPD                       17994
Gesundheitsforschung       49331
MENSCHLICHE WELT            3794
DKP                        15158
Die Grauen                 19382
BüSo                         737
Die Humanisten             47838
Gartenpartei                7611
du.                        17861
SGP                         1535
dieBasis                  628432
Bündnis C                  40126
BÜRGERBEWEGUNG              7485
III. Weg  

In [4]:
votes_process = votes.loc[["CDU", "SPD", "AfD", "FDP", "DIE LINKE", "GRÜNE", "CSU", "SSW"]].reindex(votes.index, fill_value=0)
votes_process.sum(), num_all_votes

(42415895, 46419448)

In [5]:
def calc_seats(votes, target_seats, step_size=10):
    sum_votes = votes.sum()
    divisor = sum_votes / target_seats
#     print(f"Initialer Divisor: {initial_divisor}")
    direction = None
    for i in range(1000):
        if divisor <= 0:
            raise ValueError(f"invalid divisor {divisor}")
        party_seats = (votes / divisor).round().astype(int)
        sum_seats = party_seats.sum()
#         print(i, divisor, sum_seats)
        if sum_seats == target_seats:
            break
        if sum_seats < target_seats:
            if direction == "up":
                step_size /= 2
            divisor -= step_size
            direction = "down"
        elif sum_seats > target_seats:
            if direction == "down":
                step_size /= 2
            divisor += step_size
            direction = "up"
    return party_seats

In [6]:
num_intended_seats_without_hurdle = 800
num_intended_seats = 732
seats_without_hurdle = calc_seats(votes, num_intended_seats_without_hurdle) + pd.Series({"CSU": 3}).reindex(votes.index, fill_value=0)
seats_with_hurdle = calc_seats(votes_process, num_intended_seats) + pd.Series({"CSU": 3}).reindex(votes.index, fill_value=0)
df = pd.DataFrame({
    ("", "Stimmen"): votes,
    ("", "Anteil"): votes / num_all_votes,
    ("ohne 5%-Hürde", "Sitze ideal"): num_intended_seats_without_hurdle * votes / num_all_votes,
    ("ohne 5%-Hürde", "Sitze"): seats_without_hurdle,
    ("ohne 5%-Hürde", "Anteil Sitze"): seats_without_hurdle / (num_intended_seats_without_hurdle + 3),
    ("mit 5%-Hürde", "Anteil"): votes_process / votes_process.sum(),
    ("mit 5%-Hürde", "Sitze ideal"): num_intended_seats * votes_process / votes_process.sum(),
    ("mit 5%-Hürde", "Sitze"): seats_with_hurdle,
    ("mit 5%-Hürde", "Anteil Sitze"): seats_with_hurdle / (num_intended_seats + 3),
})
df = df.sort_values(("", "Anteil"), ascending=False)
df.style.applymap(
    lambda x: (x >= 1) * "background: rgba(0, 127, 0, 0.2)", 
    subset=[("ohne 5%-Hürde", "Sitze"), ("mit 5%-Hürde", "Sitze")],
).format(formatter={
    ("", "Anteil"): "{:.4%}",
    ("ohne 5%-Hürde", "Anteil Sitze"): "{:.4%}",
    ("mit 5%-Hürde", "Anteil"): "{:.4%}",
    ("mit 5%-Hürde", "Anteil Sitze"): "{:.4%}",
}).set_table_styles([
    {"selector": ".col_heading", "props": "text-align: left"},
])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ohne 5%-Hürde,ohne 5%-Hürde,ohne 5%-Hürde,mit 5%-Hürde,mit 5%-Hürde,mit 5%-Hürde,mit 5%-Hürde
Unnamed: 0_level_1,Stimmen,Anteil,Sitze ideal,Sitze,Anteil Sitze,Anteil,Sitze ideal,Sitze,Anteil Sitze
SPD,11949756,25.7430%,205.943957,206,25.6538%,28.1728%,206.225081,206,28.0272%
CDU,8770980,18.8951%,151.160436,151,18.8045%,20.6785%,151.366778,151,20.5442%
GRÜNE,6848215,14.7529%,118.023204,118,14.6949%,16.1454%,118.184312,118,16.0544%
FDP,5316698,11.4536%,91.628802,92,11.4570%,12.5347%,91.75388,92,12.5170%
AfD,4802097,10.3450%,82.760088,83,10.3362%,11.3215%,82.87306,83,11.2925%
CSU,2402826,5.1763%,41.410678,45,5.6040%,5.6649%,41.467205,45,6.1224%
DIE LINKE,2269993,4.8902%,39.121413,39,4.8568%,5.3518%,39.174816,39,5.3061%
FREIE WÄHLER,1127171,2.4282%,19.425841,19,2.3661%,0.0000%,0.0,0,0.0000%
Tierschutzpartei,674789,1.4537%,11.629419,12,1.4944%,0.0000%,0.0,0,0.0000%
dieBasis,628432,1.3538%,10.830495,11,1.3699%,0.0000%,0.0,0,0.0000%
