In [41]:
import pandas as pd
from scipy.spatial.distance import cosine
import numpy as np
import json

In [42]:
results_csv = '../../../../Data/Results/Bundestagswahlen.csv'
results_df = pd.read_csv(results_csv)
results_df.head()

Unnamed: 0,Partei,1949,1953,1957,1961,1965,1969,1972,1976,1980,1983,1987,1990,1994,1998,2002,2005,2009,2013,2017
0,CDUCSU,3101.0,4517.0,5019.0,4532.0,4759.0,4609.0,4486.0,4863.0,4454.0,4879.0,4426.0,4382,4143,3514,3851,3517,338,4155,3293
1,SPD,2922.0,2884.0,3175.0,3622.0,3928.0,4267.0,4585.0,4256.0,4286.0,3818.0,3704.0,3346,3639,4093,3852,3425,2303,2574,2051
2,FDP,1192.0,954.0,771.0,1277.0,949.0,577.0,836.0,792.0,1062.0,695.0,909.0,1103,692,625,737,983,1456,476,1075
3,Gruene,,,,,,,,,15.0,557.0,826.0,505,727,67,856,812,1071,844,894
4,Linke,,,,,,,,,,,,243,439,51,399,871,1189,859,924


In [43]:
docsums_h5 = '../../../../cleaned_manifestos_with_sums.h5'
docsums_df = pd.read_hdf(docsums_h5)
docsums_df = docsums_df[docsums_df.type == 'b']
docsums_df.head()

Unnamed: 0,year,participation,party_name,result,text,type,doc_sum
0,1949,78.49,CDUCSU,31.01,"[düsseldorfer, leitsätze, juli, wirtschaftlich...",b,"[275.59113873868773, 1290.0181675469503, 612.0..."
1,1949,78.49,FDP,11.92,"[bremer, plattform, beschlüsse, bundesparteita...",b,"[381.04226378997555, 1346.8178863117355, 693.7..."
2,1949,78.49,Linke,,"[einheit, frieden, wohlstand, wahlprogramm, kp...",b,"[120.08180182031356, 394.1816535771359, 220.88..."
3,1949,78.49,SPD,29.22,"[frei, deutschland, wählerinnen, wähler, augus...",b,"[104.58376377634704, 354.48599992343225, 185.6..."
4,1953,85.99,CDUCSU,45.17,"[hamburger, programm, april, deutsch, bundesta...",b,"[143.5875440903401, 739.6516372366459, 383.610..."


In [52]:
coalition_csv = '../../../../Data/Koalitionen.csv'
coal_df = pd.read_csv(coalition_csv)
coal_df

Unnamed: 0,1949,1953,1957,1961,1965,1969,1972,1976,1980,1983,1987,1990,1994,1998,2002,2005,2009,2013,2017
0,CDUCSU;FDP,CDUCSU;FDP,CDUCSU,CDUCSU;FDP,CDUCSU;SPD,SPD;FDP,SPD;FDP,SPD;FDP,SPD;FDP,CDUCSU;FDP,CDUCSU;FDP,CDUCSU;FDP,CDUCSU;FDP,SPD;Gruene,SPD;Gruene,CDUCSU;SPD,CDUCSU;FDP,CDUCSU;SPD,CDUCSU;SPD


In [54]:
coal_df['1949'].values

array(['CDUCSU;FDP'], dtype=object)

In [55]:
parties = docsums_df.party_name.unique()

In [56]:
import itertools

In [57]:
for comb in itertools.combinations(parties, 2):
    print(comb)

('CDUCSU', 'FDP')
('CDUCSU', 'Linke')
('CDUCSU', 'SPD')
('CDUCSU', 'Gruene')
('CDUCSU', 'AFD')
('FDP', 'Linke')
('FDP', 'SPD')
('FDP', 'Gruene')
('FDP', 'AFD')
('Linke', 'SPD')
('Linke', 'Gruene')
('Linke', 'AFD')
('SPD', 'Gruene')
('SPD', 'AFD')
('Gruene', 'AFD')


In [58]:
combinations = [
    ('CDUCSU', 'SPD'),
    ('CDUCSU', 'FDP'),
    ('CDUCSU', 'Linke'),
    ('Linke', 'SPD'),
    ('FDP', 'SPD'),
    ('Gruene', 'SPD'),
    ('FDP', 'AFD'),
    ('FDP', 'Linke'),
    ('AFD', 'Gruene'),
    ('CDUCSU', 'Gruene'),
    ('FDP', 'Gruene'),
    ('Linke', 'Gruene'),
    ('Linke', 'AFD'),
    ('AFD', 'SPD'),
    ('AFD', 'CDUCSU')
]

In [59]:
years = docsums_df.year.unique()

In [64]:
df = docsums_df
data = []
for year in years:
    
    filtered_df = df[df.year == year]
    
    distances = []
    for comb in combinations:
        party_a = comb[0]
        party_b = comb[1]
        
        label = party_a + ' - ' + party_b
        coal = party_a in coal_df[str(year)].values[0].split(';') and party_b in coal_df[str(year)].values[0].split(';')
        
        if ((party_a in filtered_df.party_name.values) & (party_b in filtered_df.party_name.values)):
            doc_vec_a = filtered_df[filtered_df.party_name == party_a].doc_sum.values[0]
            doc_vec_b = filtered_df[filtered_df.party_name == party_b].doc_sum.values[0]
            
            distance = float(cosine(doc_vec_a, doc_vec_b))
        else:
            distance = None
            
        distance_obj = {'label': label, 'distance': distance, 'coalition': coal}
        distances.append(distance_obj)
    
    results = []
    for i, result in enumerate(results_df[str(year)]):
        party = results_df.Partei[i]
        
        if isinstance(result, str):
            result_val = float(str(result.replace(',', '.')))
        else:
            result_val = None
            
        result_obj = {'party': party, 'result': result_val, 'year': int(year)}
        results.append(result_obj)
    
    data_obj = {'year': int(year), 'results': results, 'distances': distances}
    data.append(data_obj)

In [65]:
data

[{'year': 1949,
  'results': [{'party': 'CDUCSU', 'result': 31.01, 'year': 1949},
   {'party': 'SPD', 'result': 29.22, 'year': 1949},
   {'party': 'FDP', 'result': 11.92, 'year': 1949},
   {'party': 'Gruene', 'result': None, 'year': 1949},
   {'party': 'Linke', 'result': None, 'year': 1949},
   {'party': 'AFD', 'result': None, 'year': 1949},
   {'party': 'Wahlbeteiligung', 'result': 78.49, 'year': 1949}],
  'distances': [{'label': 'CDUCSU - SPD',
    'distance': 0.010872261039398312,
    'coalition': False},
   {'label': 'CDUCSU - FDP',
    'distance': 0.011398498104587595,
    'coalition': True},
   {'label': 'CDUCSU - Linke',
    'distance': 0.020155511330913756,
    'coalition': False},
   {'label': 'Linke - SPD',
    'distance': 0.004633280232016257,
    'coalition': False},
   {'label': 'FDP - SPD',
    'distance': 0.004838361337338282,
    'coalition': False},
   {'label': 'Gruene - SPD', 'distance': None, 'coalition': False},
   {'label': 'FDP - AFD', 'distance': None, 'coalitio

In [66]:
with open('d3_distance_result_data2.json', 'w') as outfile:  
    json.dump(data, outfile)


In [7]:
years = results_df.columns.unique()

In [34]:
results = []
for year in years:
    for i, result in enumerate(results_df[str(year)]):
        party = results_df.Partei[i]
        if isinstance(result, str):
            result_val = float(str(result.replace(',', '.')))
        else:
            result_val = None
        result_obj = {'party': party, 'result': result_val, 'year': int(year)}
        results.append(result_obj)

In [35]:
results

[{'party': 'CDUCSU', 'result': 31.01, 'year': 1949},
 {'party': 'SPD', 'result': 29.22, 'year': 1949},
 {'party': 'FDP', 'result': 11.92, 'year': 1949},
 {'party': 'Gruene', 'result': None, 'year': 1949},
 {'party': 'Linke', 'result': None, 'year': 1949},
 {'party': 'AFD', 'result': None, 'year': 1949},
 {'party': 'Wahlbeteiligung', 'result': 78.49, 'year': 1949},
 {'party': 'CDUCSU', 'result': 45.17, 'year': 1953},
 {'party': 'SPD', 'result': 28.84, 'year': 1953},
 {'party': 'FDP', 'result': 9.54, 'year': 1953},
 {'party': 'Gruene', 'result': None, 'year': 1953},
 {'party': 'Linke', 'result': None, 'year': 1953},
 {'party': 'AFD', 'result': None, 'year': 1953},
 {'party': 'Wahlbeteiligung', 'result': 85.99, 'year': 1953},
 {'party': 'CDUCSU', 'result': 50.19, 'year': 1957},
 {'party': 'SPD', 'result': 31.75, 'year': 1957},
 {'party': 'FDP', 'result': 7.71, 'year': 1957},
 {'party': 'Gruene', 'result': None, 'year': 1957},
 {'party': 'Linke', 'result': None, 'year': 1957},
 {'party': '

In [36]:
list(map(int, years))

[1949,
 1953,
 1957,
 1961,
 1965,
 1969,
 1972,
 1976,
 1980,
 1983,
 1987,
 1990,
 1994,
 1998,
 2002,
 2005,
 2009,
 2013,
 2017]

In [37]:
results_obj = {'years': list(map(int, years)), 'results': results}

In [38]:
results_obj

{'years': [1949,
  1953,
  1957,
  1961,
  1965,
  1969,
  1972,
  1976,
  1980,
  1983,
  1987,
  1990,
  1994,
  1998,
  2002,
  2005,
  2009,
  2013,
  2017],
 'results': [{'party': 'CDUCSU', 'result': 31.01, 'year': 1949},
  {'party': 'SPD', 'result': 29.22, 'year': 1949},
  {'party': 'FDP', 'result': 11.92, 'year': 1949},
  {'party': 'Gruene', 'result': None, 'year': 1949},
  {'party': 'Linke', 'result': None, 'year': 1949},
  {'party': 'AFD', 'result': None, 'year': 1949},
  {'party': 'Wahlbeteiligung', 'result': 78.49, 'year': 1949},
  {'party': 'CDUCSU', 'result': 45.17, 'year': 1953},
  {'party': 'SPD', 'result': 28.84, 'year': 1953},
  {'party': 'FDP', 'result': 9.54, 'year': 1953},
  {'party': 'Gruene', 'result': None, 'year': 1953},
  {'party': 'Linke', 'result': None, 'year': 1953},
  {'party': 'AFD', 'result': None, 'year': 1953},
  {'party': 'Wahlbeteiligung', 'result': 85.99, 'year': 1953},
  {'party': 'CDUCSU', 'result': 50.19, 'year': 1957},
  {'party': 'SPD', 'result

In [39]:
with open('d3_result_data.json', 'w') as outfile:  
    json.dump(results_obj, outfile)