In [1]:
import pandas as pd
import json
import numpy as np

In [2]:
df = pd.read_csv('ess/ess.csv')
# print(df.head())

In [3]:
# Define the columns to clean
columns_to_clean = ['lrscale', 'stfdem', 'stfeco', 'stfedu', 'stfgov', 'stfhlth', 'stflife', 'trstep', 'trstplt', 'trstprl', 'trstprt', 'atchctr', 'atcherp', 'happy']

# Define the values to remove
values_to_remove = [77, 88, 99, '*']

# Loop over the columns and remove the values
for column in columns_to_clean:
    df = df[~df[column].isin(values_to_remove)]

In [5]:
# Define bins and labels
bins = [-1, 0, 2, 4, 5, 7, 9, 10]
labels = ['far left', 'left', 'center left', 'center', 'center right', 'right', 'far right']

# Create lrscale_category column based on bins and labels
df['leaning'] = pd.cut(df['lrscale'], bins=bins, labels=labels)

# year
version_year_dict = {3: "2006", 4: "2008", 5: "2010", 6: "2012", 7: "2014", 8: "2016", 9: "2018", 10: "2020"}
df['year'] = df['name'].str.extract('(\d+)')[0].astype(int).map(version_year_dict)

# satisfaction
satisfaction_columns = ['stfdem', 'stfeco', 'stfedu', 'stfgov', 'stfhlth', 'stflife']
df['satisfaction'] = np.round(df[satisfaction_columns].mean(axis=1),0)

# trust country
trust_columns = ['trstplt', 'trstprl', 'trstprt']
df['trust_country'] = np.round(df[trust_columns].mean(axis=1),0)

# trust eu
df['trust_eu'] = df['trstep']

# # attachment country
# df['attachment_country'] = df['atchctr']

# # attachment eu
# df['attachment_eu'] = df['atcherp']

# new_df = df[['year','cntry','leaning','happy','satisfaction','trust_country','trust_eu','attachment_country', 'attachment_eu']]
new_df = df[['year','cntry','leaning','happy','satisfaction','trust_country','trust_eu']]

print(new_df.shape)
print(new_df.head())

(65027, 7)
   year cntry       leaning  happy  satisfaction  trust_country  trust_eu
0  2006    BE         right      9           7.0            5.0         8
1  2006    BE  center right      8           6.0            4.0         6
2  2006    BE        center      8           7.0            4.0         3
3  2006    BE        center      9           7.0            2.0         1
4  2006    BE        center      8           6.0            2.0         5


In [6]:
new_df.loc[new_df['cntry'] == 'GR', 'cntry'] = 'EL'

In [7]:
with open("combined_data.json") as f:
    data = json.load(f)

In [8]:
# Initialize 'ess_data' in each country
for country in data:
    data[country]['ess_data'] = {}
print(data['BE'])

{'election_data': {'2019': {'NVA': 16.0, 'VB': 12.0, 'PS - Belgium': 9.5, 'CDV': 8.9, 'PVDA - Belgium': 8.6, 'OVLD': 8.5, 'MR': 7.6, 'SPA': 6.7, 'Ecolo': 6.1, 'Groen': 6.0, 'CDH': 3.7, 'Other parties': 6.4}, '2014': {'NVA': 20.3, 'PS - Belgium': 11.7, 'CDV': 11.6, 'OVLD': 9.8, 'MR': 9.6, 'SPA': 8.8, 'Groen': 5.3, 'CDH': 5.0, 'VB': 3.7, 'Ecolo': 3.3, 'Other parties': 10.9}, '2010': {'NVA': 17.4, 'PS - Belgium': 13.7, 'CDV': 10.9, 'MR': 9.3, 'SPA': 9.2, 'OVLD': 8.6, 'VB': 7.8, 'CDH': 5.5, 'Ecolo': 4.8, 'Groen': 4.4, 'Other parties': 8.4}, '2007': {'CDV/NVA': 18.5, 'MR': 12.5, 'VB': 12.0, 'OVLD': 11.8, 'PS - Belgium': 10.9, 'SPA': 10.3, 'CDH': 6.1, 'Ecolo': 5.1, 'Groen': 4.0, 'Other parties': 8.8}}, 'leaning_data': {'far-left': ['PVDA - Belgium'], 'left': ['PS - Belgium', 'Ecolo', 'Groen'], 'center-left': ['SPA'], 'center': [], 'center-right': ['CDV', 'OVLD', 'MR', 'CDH', 'CDV/NVA'], 'right': ['NVA'], 'far-right': ['VB']}, 'ess_data': {}}


In [9]:
# Iterate over the DataFrame
for index, row in new_df.iterrows():
    # Get country and year
    country = row['cntry']
    year = row['year']

    # Initialize year in 'ess_data' if not already present
    if year not in data[country]['ess_data']:
        data[country]['ess_data'][year] = {
            'leaning': [],
            'happy': [],
            'satisfaction': [],
            'trust_country': [],
            'trust_eu': []
        }

    # Append data to 'ess_data'
    data[country]['ess_data'][year]['leaning'].append(row['leaning'])
    data[country]['ess_data'][year]['happy'].append(row['happy'])
    data[country]['ess_data'][year]['satisfaction'].append(row['satisfaction'])
    data[country]['ess_data'][year]['trust_country'].append(row['trust_country'])
    data[country]['ess_data'][year]['trust_eu'].append(row['trust_eu'])

In [10]:
for country in data:
    for year in data[country]['ess_data']:
        print(f"{country} {year}: {len(data[country]['ess_data'][year]['happy'])}")
    print()

AT 2016: 1684
AT 2018: 2018

BE 2006: 1620
BE 2010: 1576
BE 2014: 1647
BE 2018: 1617

DE 2008: 2246
DE 2012: 2560
DE 2016: 2605
DE 2020: 7575

DK 2010: 1327
DK 2014: 1316
DK 2018: 1326

EL 2020: 2250

ES 2010: 1525
ES 2014: 1408
ES 2018: 1239

FI 2010: 1695
FI 2014: 1893
FI 2018: 1597

FR 2006: 1758
FR 2012: 1789
FR 2016: 1833

IE 2006: 1275
IE 2010: 1854
IE 2016: 1966
IE 2020: 1290

IT 2012: 708
IT 2018: 1814


NL 2012: 1595
NL 2016: 1402
NL 2020: 1263

PT 2014: 1006
PT 2018: 830

SE 2010: 1164
SE 2014: 1441
SE 2018: 1315



In [12]:
with open('ess_data.json', 'w') as f:
    json.dump(data, f, indent=3)