In [1]:
# load requirements
import pandas as pd
import xml.etree.ElementTree as ET
from atproto import Client

In [4]:
# parse the XML file
tree = ET.parse('../data/raw_data/MDB_STAMMDATEN.XML')
root = tree.getroot()

In [5]:
# dictionary comprehension to find the encoding of the latest election period
all_phases = {
    int(wp.findtext('WP'))
    for mdb in root.findall('MDB')
    for wp in mdb.findall('.//WAHLPERIODE')
    if wp.find('WP') is not None
}
latest_wp = max(all_phases)

In [6]:
# create the df by looping over all members

# create empty list to store the data
data = []

# now loop over all members and store relevant information
for mdb in root.findall("MDB"):

    # extract general information about the mp
    mdb_id = mdb.findtext("ID")
    name_element = mdb.find(".//NAME")
    first_name = name_element.findtext("VORNAME")
    surname = name_element.findtext("NACHNAME")
    acad_title = name_element.findtext("AKAD_TITEL")
    full_name = f"{acad_title} {first_name} {surname}".strip()
    bio_element = mdb.find("BIOGRAFISCHE_ANGABEN")
    birthdate = bio_element.findtext("GEBURTSDATUM")
    gender = bio_element.findtext("GESCHLECHT")
    party = bio_element.findtext("PARTEI_KURZ")
    profession = bio_element.findtext("BERUF")

    # check for participation in latest election period and append data if fits
    for wp in mdb.findall(".//WAHLPERIODE"):
        if int(wp.findtext("WP")) == latest_wp:
            # extract information about the specific election
            district_number = wp.findtext("WKR_NUMMER")
            district_name = wp.findtext("WKR_NAME")
            mandate = wp.findtext("MANDATSART")
            data.append({
                "id": mdb_id,
                "full_name": full_name,
                "birthdate": birthdate,
                "gender": gender,
                "party": party,
                "profession": profession,
                "district_number": district_number,
                "district_name": district_name,
                "mandate": mandate
            })

# convert this to a df and show the first rows
mps_df = pd.DataFrame(data).sort_values(by=["party", "full_name"])
mps_df.head()

Unnamed: 0,id,full_name,birthdate,gender,party,profession,district_number,district_name,mandate
512,11005504,Achim Köhler,22.03.1964,männlich,AfD,,277.0,Rhein-Neckar,Landesliste
416,11005408,Adam Balten,09.11.1983,männlich,AfD,Mechatronik-Ingenieur,112.0,Wesel I,Landesliste
412,11005404,Alexander Arpaschi,24.05.1970,männlich,AfD,,273.0,Rastatt,Landesliste
629,11005622,Alexis L. Giersch,25.09.1963,männlich,AfD,,,,Landesliste
169,11004674,Andreas Bleck,17.04.1988,männlich,AfD,Büroleiter,196.0,Neuwied,Landesliste


In [7]:
# create a client instance
client = Client()

# get the app password
with open("app_password.txt", "r") as f:
    app_password = f.read()

handle = "mxwlnd.bsky.social"

# login with my credentials
client.login(handle, app_password)

ProfileViewDetailed(did='did:plc:5sqqg66p7muc7ogbp6xx4sw6', handle='mxwlnd.bsky.social', associated=ProfileAssociated(chat=None, feedgens=0, labeler=False, lists=0, starter_packs=0, py_type='app.bsky.actor.defs#profileAssociated'), avatar='https://cdn.bsky.app/img/avatar/plain/did:plc:5sqqg66p7muc7ogbp6xx4sw6/bafkreigwrjedzb7jvmowkn6fbe2atbnlwecsa4ouk5wpz54eg6rqkvayrq@jpeg', banner=None, created_at='2025-05-19T19:28:35.738Z', description=None, display_name='', followers_count=2, follows_count=1, indexed_at='2025-05-19T19:28:35.738Z', joined_via_starter_pack=None, labels=[], pinned_post=None, posts_count=0, verification=None, viewer=ViewerState(blocked_by=False, blocking=None, blocking_by_list=None, followed_by=None, following=None, known_followers=None, muted=False, muted_by_list=None, py_type='app.bsky.actor.defs#viewerState'), py_type='app.bsky.actor.defs#profileViewDetailed')

In [9]:
# define a function that gives me potential bsky handle
def find_first_handle(name):
    response = client.app.bsky.actor.search_actors({'term': name})
    actors = response["actors"]
    if len(actors) >= 1:
        handle = actors[0]["handle"]
    else:
        handle = ""

    return handle

# apply this to all names of MPs and stor result in new column
mps_df["potential_handle"] = mps_df["full_name"].apply(find_first_handle)

In [11]:
mps_df.head(20)

Unnamed: 0,id,full_name,birthdate,gender,party,profession,district_number,district_name,mandate,potential_handle
512,11005504,Achim Köhler,22.03.1964,männlich,AfD,,277.0,Rhein-Neckar,Landesliste,
416,11005408,Adam Balten,09.11.1983,männlich,AfD,Mechatronik-Ingenieur,112.0,Wesel I,Landesliste,
412,11005404,Alexander Arpaschi,24.05.1970,männlich,AfD,,273.0,Rastatt,Landesliste,
629,11005622,Alexis L. Giersch,25.09.1963,männlich,AfD,,,,Landesliste,
169,11004674,Andreas Bleck,17.04.1988,männlich,AfD,Büroleiter,196.0,Neuwied,Landesliste,
532,11005524,Andreas Mayer,25.01.1995,männlich,AfD,Luft- und Raumfahrttechnik-Ingenieur,253.0,Donau-Ries,Landesliste,qimmuno.bsky.social
548,11005540,Andreas Paul,10.10.1978,männlich,AfD,Offizier in der Bundeswehr,27.0,Oldenburg – Ammerland,Landesliste,andimicroscopy.bsky.social
567,11005559,Angela Rudzka,20.01.1984,weiblich,AfD,Referentin der AfD Fraktion im Landtag Nieders...,49.0,Salzgitter – Wolfenbüttel,Landesliste,
557,11005549,Arne Raue,18.08.1970,männlich,AfD,,60.0,BRB a.d.Havel– Potsdam-MittelmarkI –HVL III–Te...,Direktwahl,
603,11005596,Bastian Treuheit,17.01.1998,männlich,AfD,Kaufmann im Groß- und Einzelhandel,242.0,Fürth,Landesliste,


In [12]:
# export the df to csv
mps_df.to_csv("../data/raw_data/bundestag_mps_potential_handle.csv")