# Convert date to json file that can be used in visualisation

In [32]:
import pandas as pd
import numpy as np
from pathlib import Path
import itertools
import json

**Load the data**

In [33]:
file_abg = Path('../Daten_Thurgau/Mitglieder_GR_2008_2021.csv')
file_vor = Path('../Daten_Thurgau/Vorstoesse_GR_ab_2012_Themen_def.csv')

In [34]:
vor = pd.read_csv(file_vor,delimiter=';')
abg = pd.read_csv(file_abg,delimiter=';')

**Nodes**

In [50]:
# select only the data after 2012
vor_2012 = vor[vor.loc[vor['DATUM_VORSTOSS_EINGANG'].str[-4:] == '2012'].index[0]:]
# exclude Initiativen because they do not come from the parlamentarians and thus do not have any singers
vor_2012 = vor_2012[vor_2012['VORSTOSSART_CODE']!=6]
# filter duplicates (Manche sind doppelt (einmal vor einmal nach beschluss)). Take last
vor_2012 = vor_2012.drop_duplicates(subset=['GRG_NR','DATUM_VORSTOSS_EINGANG'],keep='last')
# List with only the names of the signers
vor_untz = vor_2012[[f'VORSTOESSER_IN_{i}' for i in range(1,11)]]

abg_2012 = abg.loc[abg['Jahr']>=2012]
# Build a list of (mostly) unique parlamentarians
abg_list = abg_2012.drop(['Geb_Jahr','Beruf','Fraktion','GR_Eintritt','Ort','Bezirk'],axis=1).drop_duplicates(ignore_index=True)
abg_list_years = abg_list.groupby(['Name','Vorname','Geschlecht']).size().reset_index(name='Dienstjahre') # Zählt Dienstjahre
# For those who changed party (only 3 since 2012) take tha last entry
abg_list = abg_list.drop('Jahr',axis=1).drop_duplicates(subset=['Name','Vorname','Geschlecht'], keep='last', ignore_index=True)
abg_list = abg_list.merge(abg_list_years)

print(f'Wir haben {len(abg_list)} unabhängige Abgeordnete seit 2012') 

Wir haben 264 unabhängige Abgeordnete seit 2012


In [51]:
nodes = [dict(id=i,Name=abg_list['Vorname'].loc[i] + ' ' + abg_list['Name'].loc[i],
              Geschlecht=abg_list['Geschlecht'].loc[i],
              Partei=abg_list['Partei'].loc[i],
              Dienstjahre=int(abg_list['Dienstjahre'].loc[i]),
              Vorstösse=[dict(id=v, # for some reason this is numpy int which is not recognized by json
                            Name=vor_2012.loc[v]['VORSTOSS_BEZEICHNUNG'],
                            Type=int(vor_2012.loc[v]['VORSTOSSART_CODE']),
                            Thema=list(vor_2012.loc[v][['Thema_1','Thema_2','Thema_3']].dropna()),
              ) for v in vor_2012.index if abg_list['Vorname'].loc[i] + ' ' + abg_list['Name'].loc[i] in list(vor_untz.loc[v].dropna())]) for i in abg_list.index]
name_to_id = {n['Name']:n['id'] for n in nodes}

**Links**

In [52]:
# Auslesen der Unterzeichner:innen
links_list=None
for i,vor_idx in enumerate(vor_untz.index):
    # Sortieren um jede Verbindung nur einmal zu haben (a,b) vs (b,a)
    vor_untz_i = sorted(list(vor_untz.loc[vor_idx].dropna()))
    # Erstellen einer lister aller Kombinationen
    vor_untz_i_links = list(itertools.combinations(vor_untz_i, 2))
    for l,link in enumerate(vor_untz_i_links):
        # convertin Names to ids
        link = [name_to_id[link[0]],name_to_id[link[1]]]
        # initialize links_list
        if links_list is None:
            links_list=[[link,[dict(id=vor_idx, # for some reason this is numpy int which is not recognized by json
                                    Name=vor_2012.loc[vor_idx]['VORSTOSS_BEZEICHNUNG'],
                                    Type=int(vor_2012.loc[vor_idx]['VORSTOSSART_CODE']),
                                    Thema=list(vor_2012.loc[vor_idx][['Thema_1','Thema_2','Thema_3']].dropna()),
                                    )]]]
        # add link if not present
        elif link not in [link[0] for link in links_list]:
            links_list.append([link,[dict(id=vor_idx,
                                    Name=vor_2012.loc[vor_idx]['VORSTOSS_BEZEICHNUNG'],
                                    Type=int(vor_2012.loc[vor_idx]['VORSTOSSART_CODE']),
                                    Thema=list(vor_2012.loc[vor_idx][['Thema_1','Thema_2','Thema_3']].dropna()),
                                    )]])
        # if present append id of the vorstoss
        else:
            links_list[[link[0] for link in links_list].index(link)][1].append(dict(id=vor_idx,
                                    Name=vor_2012.loc[vor_idx]['VORSTOSS_BEZEICHNUNG'],
                                    Type=int(vor_2012.loc[vor_idx]['VORSTOSSART_CODE']),
                                    Thema=list(vor_2012.loc[vor_idx][['Thema_1','Thema_2','Thema_3']].dropna()),
                                    ))

In [53]:
links = [dict(source=links_list[i][0][0],target=links_list[i][0][1],value=links_list[i][1]) for i in range(len(links_list))]

**Save json file**

In [54]:
netzwerk = dict(nodes=nodes,links=links)
with open("../Daten_Thurgau/netzwerk.json", "w") as file:
   json.dump(netzwerk, file)

In [55]:
subjects=sorted(pd.concat([vor_2012['Thema_1'],vor_2012['Thema_2'],vor_2012['Thema_3']]).dropna().drop_duplicates())
subjects=[dict(name=s,value=s) for s in subjects]
with open("../Daten_Thurgau/subjects.json", "w") as file:
   json.dump(subjects, file)

In [3]:
import seaborn as sns
sns.color_palette('Blues',10).as_hex()

In [29]:
print(sns.color_palette('Purples_d',10).as_hex())

['#b8b7d9', '#aaa7d0', '#9c97c8', '#8e86bf', '#8076b7', '#7269a7', '#665e90', '#595378', '#4c4861', '#403e4a']


In [2]:
print(sns.color_palette('Blues_d',10).as_hex())

['#96c3df', '#81b5d8', '#6ca8d1', '#569bca', '#418ec4', '#377fb4', '#36709a', '#35607f', '#345166', '#34424c']


In [57]:
abg_2012['Ort'].drop_duplicates()

3                 Arbon
15           Frauenfeld
33           Lanterswil
45            Eschlikon
46             Dussnang
             ...       
1731    St. Margarethen
1735         Weingarten
1744            Mettlen
1766          Birwinken
1772            Braunau
Name: Ort, Length: 105, dtype: object

In [58]:
abg_2012['Bezirk'].drop_duplicates()

3             Arbon
15       Frauenfeld
33       Weinfelden
45       Münchwilen
57     Bischofszell
65      Kreuzlingen
143       Steckborn
333    Diessenhofen
Name: Bezirk, dtype: object