In [1]:
import pandas as pd
from collections import defaultdict, Counter
import utils
import os
from create_network import get_fraction_color
from tqdm import tqdm

In [2]:
BASE_PATH = 'graphs/Bundestag/'

In [3]:
data = pd.read_stata('data/BTVote/voting_behavior_V2_19492021.dta')
data

Unnamed: 0,id_de_parliament,lastname,firstname,party_det,ppg,party_text,elecper,office_spell,vote_id,vote_date,vote_beh,vote_deviate,mp_id_old,pers_id_pdbd,id_de_manow
0,11000001.0,Abelein,Manfred,CDU (Christian Democratic Union),CDU/CSU,CDU,11,1.0,11106.0,1988-11-25,no,no deviation,10.0,DE_Abelein_Manfred_1930,10
1,11000001.0,Abelein,Manfred,CDU (Christian Democratic Union),CDU/CSU,CDU,8,1.0,8028.0,1978-02-16,no,no deviation,10.0,DE_Abelein_Manfred_1930,10
2,11000001.0,Abelein,Manfred,CDU (Christian Democratic Union),CDU/CSU,CDU,9,1.0,9002.0,1981-03-18,no,no deviation,10.0,DE_Abelein_Manfred_1930,10
3,11000001.0,Abelein,Manfred,CDU (Christian Democratic Union),CDU/CSU,CDU,10,1.0,10051.0,1985-06-21,no,no deviation,10.0,DE_Abelein_Manfred_1930,10
4,11000001.0,Abelein,Manfred,CDU (Christian Democratic Union),CDU/CSU,CDU,11,1.0,11194.0,1990-04-26,unexcused absent,unexcused absent,10.0,DE_Abelein_Manfred_1930,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435244,11004971.0,Jäger,Florian,AfD (Alternative für Deutschland),AfD,AfD,19,1.0,19242.0,2021-08-25,yes,weak deviation,,,
1435245,11004971.0,Jäger,Florian,AfD (Alternative für Deutschland),AfD,AfD,19,1.0,19243.0,2021-08-25,no,no deviation,,,
1435246,11004972.0,Gökhan,Zeki,"Left/PDS (The Left, previously Party of Democr...",Left/PDS,Linke,19,1.0,19243.0,2021-08-25,no,no deviation,,,
1435247,11004972.0,Gökhan,Zeki,"Left/PDS (The Left, previously Party of Democr...",Left/PDS,Linke,19,1.0,19242.0,2021-08-25,no,weak deviation,,,


In [4]:
eps = data['elecper'].unique()
eps

array([11,  8,  9, 10,  6,  7,  5,  3,  4,  2,  1, 12, 13, 14, 16, 15, 17,
       18, 19], dtype=int8)

In [45]:
ep = 15  # set election period

In [46]:
data_ep = data[data['elecper'] == ep]
data_ep['vote_beh'].unique()

['yes', 'no', 'abstain', 'unexcused absent', 'excused absence']
Categories (7, object): ['excused absence' < 'yes' < 'no' < 'abstain' < 'unexcused absent' < 'invalid vote' < 'voting behavior not/wrongly protocolled']

In [47]:
poll_ids = data_ep['vote_id'].unique().astype(int)

In [48]:
MD_ids = data_ep['id_de_parliament'].unique().astype(int)

In [49]:
Parties = data_ep['ppg'].unique()
Parties

['SPD', 'CDU/CSU', 'FDP', 'Greens']
Categories (7, object): ['SPD' < 'CDU/CSU' < 'FDP' < 'Greens' < 'Left/PDS' < 'other' < 'AfD']

In [50]:
def mdid_to_name(mdid, data_ep):
    md_rows = data_ep[data_ep['id_de_parliament'] == mdid]
    md_row = md_rows.iloc[0]
    return f"{md_row['firstname']} {md_row['lastname']}"

def mdid_to_party(mdid, data_ep):
    md_rows = data_ep[data_ep['id_de_parliament'] == mdid]
    md_row = md_rows.iloc[0]
    return md_row['ppg']

In [51]:
MD_names = {id: mdid_to_name(id, data_ep) for id in MD_ids}
MD_parties = {id: mdid_to_party(id, data_ep) for id in MD_ids}

In [52]:
leg_year = utils.ELEC_YEARS[ep-1]

In [53]:
period = utils.year_to_period(leg_year)
filepath = BASE_PATH+period+'/network'+period+'.net'
os.makedirs(BASE_PATH+period+'/', exist_ok=True)

In [54]:
vote_options = data_ep['vote_beh'].unique()
SV = {id: defaultdict(int) for id in MD_ids}
for pid in tqdm(poll_ids):
    VC = {vo: [] for vo in vote_options}
    for i, vote in data_ep[data_ep['vote_id'] == pid].iterrows():
        VC[vote['vote_beh']].append(int(vote['id_de_parliament']))
    for voters in VC.values():
        lnv = len(voters)
        for i in range(lnv):
            vo1 = voters[i]
            for j in range(i+1, lnv):
                vo2 = voters[j]
                SV[vo1][vo2] += 1
                SV[vo2][vo1] += 1
E = set()
n_polls = len(poll_ids)
for midx, row in SV.items():
    for midy, cnt in row.items():
        if cnt == 0:
            continue
        E.add((min(midx, midy), max(midx, midy), round(cnt / n_polls, 3)))
E = list(E)
with open(filepath, 'w') as f:
    MI_LOOKUP = dict()
    f.write('*vertices '+str(len(MD_ids))+'\n')
    for i, mdid in enumerate(MD_ids):
        MI_LOOKUP[mdid] = i+1
        party = MD_parties[mdid]
        line = f'{i+1} "{MD_names[mdid]}" "{party}" "{mdid}" "{get_fraction_color(party)}"\n'
        f.write(line)
    f.write('*edges '+str(len(E))+'\n')
    for midx, midy, weight in E:
        ix, iy  = MI_LOOKUP[midx], MI_LOOKUP[midy]
        f.write(str(ix) + " " + str(iy) + " " + str(weight)+ "\n")


    

100%|██████████| 102/102 [00:09<00:00, 10.99it/s]
