In [25]:
# load packages
import requests
import re
from bs4 import BeautifulSoup
import datetime
import pickle

# Get data on all members of the Bundestag

In [26]:
# get data from bundestag API
members = requests.get("https://www.bundestag.de/static/appdata/sitzplan/data.json").json()

# convert dictionary to list
members = [members[member] for member in members if member != "-1"]

# Get voting data for all members

In [27]:
# define base url
base_url = "https://www.bundestag.de"

# iterate through members
for member in members:
    # report member to be scraped
    print(f"Get votes for {member['lastnamefirst']}")

    # get href for member and merge to url to call member data
    url = base_url + members[0]["href"] + "?subview=na"

    # get page
    r = requests.get(url)

    # get soup
    soup = BeautifulSoup(r.content)

    # get voter_id
    voter_id = [row["data-loadable"][-4:] for row in soup.find_all("div", attrs = {"class": "row"}) if row.has_attr("data-loadable")][0]

    # create empty list to store votes
    votes = []

    # request first set of voting data
    html_content = requests.get(f"https://www.bundestag.de/apps/na/na/abstimmungenForMdb.form?ajax=true&vaid={voter_id}").content.decode("utf-8")

    # parse to html
    parsed_html = BeautifulSoup(html_content)

    # get voting data
    votes_raw = parsed_html.find("table").find("tbody").find_all("tr")

    # iterate through votes
    for vote in votes_raw:

        # check if vote is not empty
        if vote.text != "":

            # get tds for vote
            tds = vote.find_all("td")

            # get text from tds and append to votes list
            votes.append({"date": datetime.datetime.strptime(tds[0].text.replace("\n", ""), '%d.%m.%Y').date(),
                          "topic": tds[1].text.replace("\n", ""),
                          "vote": tds[2].text.replace("\n", "")})

    # get number of pages to be scraped
    pages = round(int(re.search("von\s(\d+)", parsed_html.find("p", attrs = {"class": "bt-paginierung__text"}).find("span").text).group(1))/10, 0)

    # define offset
    offset = 1

    # initiate loop
    while offset <= pages:

        # get next set of votes
        html_content = requests.get(f"https://www.bundestag.de/apps/na/na/abstimmungenForMdbResults.form?vaid={voter_id}&offset={offset*10}").content.decode("utf-8")

        # parse to html
        parsed_html = BeautifulSoup(html_content)

        # get voting data
        votes_raw = parsed_html.find("table").find("tbody").find_all("tr")

        # iterate through votes
        for vote in votes_raw:

            # check if vote is not empty
            if vote.text != "":

                # get tds for vote
                tds = vote.find_all("td")

                # get text from tds and create dict
                temp = {"date": tds[0].text.replace("\n", ""),
                        "topic": tds[1].text.replace("\n", ""),
                        "vote": tds[2].text.replace("\n", "").rstrip()}

                # check if vote was cast during this legislation period
                if datetime.datetime.strptime(temp["date"], '%d.%m.%Y').date() > datetime.date(2021, 9, 26):
                    # append to votes list
                    votes.append(temp)

                else:
                    offset = pages + 1

        # increase offset
        offset += 1

    # add votes to member
    member["votes"] = votes

Get votes for Gauland, Dr. Alexander
Get votes for Seestern-Pauly, Matthias
Get votes for Grosse-Brömer, Michael
Get votes for Schönberger, Marlene
Get votes for Nasr, Rasha
Get votes for Glaser, Albrecht
Get votes for Seiter, Prof. Dr. Stephan
Get votes for Grübel, Markus
Get votes for Schröder, Christina-Johanne
Get votes for Nickholz, Brian
Get votes for Gnauck, Hannes
Get votes for Semet, Rainer
Get votes for Grütters, Prof. Monika
Get votes for Schulz-Asche, Kordula
Get votes for Nietan, Dietmar
Get votes for Gottschalk, Kay
Get votes for Skudelny, Judith
Get votes for Grund, Manfred
Get votes for Sekmen, Melis
Get votes for Nürnberger, Jörg
Get votes for Harder-Kühnel, Mariana Iris
Get votes for Stark-Watzinger, Bettina
Get votes for Grundmann, Oliver
Get votes for Slawik, Nyke
Get votes for Oehl, Lennard
Get votes for Haug, Jochen
Get votes for Stockmeier, Konrad
Get votes for Güler, Serap
Get votes for Spallek, Dr. Anne Monika
Get votes for Özdemir (Duisburg), Mahmut
Get votes 

In [28]:
# save data by pickling
if input("Are you sure, you want to overwrite the data file? (Yes|No)") == "Yes":
    with open("voting_data.pkl", "wb") as f:
        pickle.dump(members, f)