In [95]:
# load packages
import requests
import re
from bs4 import BeautifulSoup
import datetime
import pickle

# Get data on all members of the Bundestag

In [96]:
# get data from bundestag API
members = requests.get("https://www.bundestag.de/static/appdata/sitzplan/data.json").json()

# convert dictionary to list
members = [members[member] for member in members if member != "-1"]

# Get voting data for all members

In [97]:
# define base url
base_url = "https://www.bundestag.de"

# create list to store ids of members with corrupt data
exceptions = []

# set counter to report status
member_count = 1

# get list length to report status
list_length = len(members)

# iterate through members
for member in members:
    # increase counter
    member_count += 1

    # report member to be scraped
    print(f"Get votes for {member['name']} ({member_count}/{list_length})")

    # get href for member and merge to url to call member data
    url = base_url + member["href"] + "?subview=na"

    # get page
    r = requests.get(url)

    # get soup
    soup = BeautifulSoup(r.content)

    # get voter_id
    voter_id = [re.search("vaid=(\d+)", row["data-loadable"]).group(1) for row in soup.find_all("div", attrs = {"class": "row"}) if row.has_attr("data-loadable")][0]

    # create empty list to store votes
    votes = []

    # request first set of voting data
    html_content = requests.get(f"https://www.bundestag.de/apps/na/na/abstimmungenForMdb.form?ajax=true&vaid={voter_id}").content.decode("utf-8")

    # parse to html
    parsed_html = BeautifulSoup(html_content)

    # get voting data
    votes_raw = parsed_html.find("table").find("tbody").find_all("tr")

    # iterate through votes
    for vote in votes_raw:

        # check if vote is not empty
        if vote.text != "":

            # get tds for vote
            tds = vote.find_all("td")

            # get text from tds and append to votes list
            votes.append({"date": datetime.datetime.strptime(tds[0].text.replace("\n", ""), '%d.%m.%Y').date(),
                          "topic": tds[1].text.replace("\n", ""),
                          "vote": tds[2].text.replace("\n", "")})

    try:

        # get number of pages to be scraped
        pages = round(int(re.search("von\s(\d+)", parsed_html.find("p", attrs = {"class": "bt-paginierung__text"}).find("span").text).group(1))/10, 0)
    except:

        # add member to exception list
        exceptions.append(member["id"])

        # set pages value to 0
        pages = 0

    # check if pages was set to 0
    if pages != 0:
        # define offset
        offset = 1

        # initiate loop
        while offset <= pages:

            # get next set of votes
            html_content = requests.get(f"https://www.bundestag.de/apps/na/na/abstimmungenForMdbResults.form?vaid={voter_id}&offset={offset*10}").content.decode("utf-8")

            # parse to html
            parsed_html = BeautifulSoup(html_content)

            # get voting data
            votes_raw = parsed_html.find("table").find("tbody").find_all("tr")

            # iterate through votes
            for vote in votes_raw:

                # check if vote is not empty
                if vote.text != "":

                    # get tds for vote
                    tds = vote.find_all("td")

                    # get text from tds and create dict
                    temp = {"date": tds[0].text.replace("\n", ""),
                            "topic": tds[1].text.replace("\n", ""),
                            "vote": tds[2].text.replace("\n", "").rstrip()}

                    # check if vote was cast during this legislation period
                    if datetime.datetime.strptime(temp["date"], '%d.%m.%Y').date() > datetime.date(2021, 9, 26):
                        # append to votes list
                        votes.append(temp)

                    else:
                        offset = pages + 1

            # increase offset
            offset += 1

        # add votes to member
        member["votes"] = votes

# remove members from exception list
members = [member for member in members if member["id"] not in exceptions]

Get votes for Dr. Alexander Gauland
Get votes for Matthias Seestern-Pauly
Get votes for Michael Grosse-Brömer
Get votes for Marlene Schönberger
Get votes for Rasha Nasr
Get votes for Albrecht Glaser
Get votes for Prof. Dr. Stephan Seiter
Get votes for Markus Grübel
Get votes for Christina-Johanne Schröder
Get votes for Brian Nickholz
Get votes for Hannes Gnauck
Get votes for Rainer Semet
Get votes for Prof. Monika Grütters
Get votes for Kordula Schulz-Asche
Get votes for Dietmar Nietan
Get votes for Kay Gottschalk
Get votes for Judith Skudelny
Get votes for Manfred Grund
Get votes for Melis Sekmen
Get votes for Jörg Nürnberger
Get votes for Mariana Iris Harder-Kühnel
Get votes for Bettina Stark-Watzinger
Get votes for Oliver Grundmann
Get votes for Nyke Slawik
Get votes for Lennard Oehl
Get votes for Jochen Haug
Get votes for Konrad Stockmeier
Get votes for Serap Güler
Get votes for Dr. Anne Monika Spallek
Get votes for Mahmut Özdemir
Get votes for Martin Hess
Get votes for Dr. Marie-A

In [99]:
# save data by pickling
if input("Are you sure, you want to overwrite the data file? (Yes|No)") == "Yes":
    with open("voting_data.pkl", "wb") as f:
        pickle.dump(members, f)