# SwissVotes - Analysis

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import re

## Data cleaning

In [None]:
df_full = pd.read_csv('swiss-votes.csv', delimiter=';')
df_full.head()

In [None]:
df = pd.DataFrame()
df['title_fr'] = df_full['titel_off_f'] # Title of the vote in French
df['date'] = pd.to_datetime(df_full['datum'], dayfirst=True)
df['date_year'] = df['date'].dt.year
df['type'] = df_full['rechtsform'].map({
    1: 'mandatory-referendum',
    2: 'optional-referendum',
    3: 'popular-initiative',
    4: 'AA',
    5: 'BB'
})
df['author'] = df_full['urheber']
df['author_fr'] = df_full['urheber-fr']
df['valid_signatures'] = df_full['unter_g']

df

### Political parties

In [None]:
def has_svp(authors): # UDC: Union Democrate du Centre
    authors = authors.lower()
    return "schweizerische volkspartei" in authors or re.search(r'\bsvp\b', authors) is not None

def has_sp(authors): # PS: Parti Socialiste
    authors = authors.lower()
    return "sozialdemokratischen partei" in authors \
        or "sozialdemokratische partei" in authors \
        or re.search(r'\bsp\b', authors) is not None

def has_mitte(authors): # Partis centristes
    authors = authors.lower()
    return re.search(r'\b(cvp|bdp)\b', authors) is not None

def has_fdp(authors): # Parti libéral-démocrate
    authors = authors.lower()
    return re.search(r'\bfdp\b', authors) is not None

def has_grun(authors): # Partis verts
    authors = authors.lower().replace('ü', 'u')
    return re.search(r'\bgrun(e|en|es)?\b', authors) is not None

def has_uss(authors): # Union syndicale suisse
    authors = authors.lower()
    return 'sgb' in authors or re.search(r'schweizerische(n|r) gewerk?schaftsbund', authors) is not None

df['author_svp'] = df['author'].map(has_svp)
df['author_sp'] = df['author'].map(has_sp)
df['author_mitte'] = df['author'].map(has_mitte)
df['author_fdp'] = df['author'].map(has_fdp)
df['author_grun'] = df['author'].map(has_grun)
df['author_uss'] = df['author'].map(has_uss)

df['author_partei'] = df['author_svp'] | df['author_sp'] | df['author_mitte'] | df['author_fdp'] | df['author_grun']

FULL_NAMES = {
    'svp': "Swiss People's Party",
    'sp': "Social Democratic Party",
    'mitte': "The Centre",
    'fdp': "Free Democratic Party",
    'grun': "Green Party of Switzerland",
    'uss': "Swiss Trade Union Federation"
}

No author is provided for mandatory referendums (since the referendum is not initiated by a specific group of people). A negligible number of votes of another type also miss this information.

In [None]:
# Missing data
df.query("type != 'mandatory-referendum' & author == '.'")

In [None]:
df_initiatives = df.query("type == 'popular-initiative'")

print('Percentage of popular initiatives from a political party', df_initiatives['author_partei'].sum() / len(df_initiatives))

In [None]:
print("Number of initiatives per party:")

for id in FULL_NAMES:
    print(FULL_NAMES[id], sum(df_initiatives[f'author_{id}']))

In [None]:
def plot():
    data = []
    for id in ['sp', 'uss', 'svp', 'grun', 'fdp', 'mitte']:
        data.append({'party': FULL_NAMES[id], 'count': sum(df_initiatives[f'author_{id}'])})

    df_data = pd.DataFrame(data)
    ax = sns.barplot(df_data, y="party", x="count")
    ax.set(xlabel='Number of initiatives', ylabel=None)
    plt.savefig('output/total-initiatives-by-party.png', bbox_inches='tight')
    plt.show()

plot()

In [None]:
for _, row in df_initiatives.iterrows():
    aut = row['author']
    if 'partei' in aut.lower() and not row['author_partei']:
        print(aut)

In [None]:
from collections import Counter
from matplotlib_venn import venn3

def plot_join_initiatives(ids, columns, colors, output_file):
    counter = Counter()

    for _, row in df_initiatives.iterrows():
        votes = (row[columns[0]], row[columns[1]], row[columns[2]])

        if votes != (0, 0, 0):
            counter.update([votes])

    plt.figure()
    ax = plt.gca()

    v = venn3(
        subsets=(counter[(True, False, False)], counter[(False, True, False)], counter[(True, True, False)], counter[(False, False, True)], counter[(True, False, True)], counter[(False, True, True)], counter[(True, True, True)]),
        set_labels=None,
        ax=ax,
        set_colors=colors
    )

    for id in ['001', '010', '011', '100', '101', '110', '111']:
        try:
            v.get_patch_by_id(id).set_alpha(0.7)
        except:
            pass

    
    h = [v.get_patch_by_id('100'), v.get_patch_by_id('010'), v.get_patch_by_id('001')]
    l = [FULL_NAMES[id] for id in ids]
    ax.legend(handles=h, labels=l)

    plt.savefig(output_file, bbox_inches='tight')
    plt.show()

plot_join_initiatives(['sp', 'grun', 'svp'], ('author_sp', 'author_grun', 'author_svp'), ('red', 'green', 'blue'), 'output/joint-initiatives-parties.png')
plot_join_initiatives(['sp', 'grun', 'uss'], ('author_sp', 'author_grun', 'author_uss'), ('red', 'green', 'purple'), 'output/joint-initiatives-uss.png')

In [None]:
for id in ['sp', 'grun', 'svp']:
    df_party = df_initiatives.query(f"author_{id} == 1")
    plt.figure(figsize=(10, 6))
    sns.histplot(df_party['date_year'], bins=(df_party['date_year'].max()-df_party['date_year'].min()) // 10)
    plt.title(f"Number of popular votes per year for {FULL_NAMES[id]}")
    plt.xlabel('Year')
    plt.ylabel('Number of votes')
    plt.show()

In [None]:
def plot():
    rows = []

    for id in ['sp', 'svp', 'grun']:
        for _, r in df_initiatives.query(f"author_{id} == 1").iterrows():
            rows.append({'year': r['date_year'], 'party': id})

    df_initiatives_parties = pd.DataFrame(rows, columns=['year', 'party'])

    plt.figure(figsize=(10, 6))

    sns.histplot(df_initiatives_parties, x='year', hue='party', multiple="dodge", binwidth=10)

    plt.title(f"Number of popular initiatives per decade by party")
    plt.xlabel('Year')
    plt.ylabel('Number of votes')
    plt.show()

plot()

## Analysis

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['date_year'], binwidth=10)
plt.title('Number of popular votes per decade')
plt.xlabel('Year')
plt.ylabel('Number of votes')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df_initiatives['date_year'], binwidth=10)
plt.title('Number of popular initiatives per decade')
plt.xlabel('Year')
plt.ylabel('Number of votes')
plt.show()

In [None]:
sns.displot(df, x='date_year', hue='type', kind='hist', multiple='fill', aspect=2, height=6)
plt.title("Proportion of the referendum used over the years")
plt.xlabel("Year")
plt.xlim(df['date_year'].min(), df['date_year'].max())
plt.ylabel("Proportion")
plt.show()

## Dimension reduction

In [None]:
from tqdm import tqdm

indices = [c for c in df_full.columns if c[:2] == 'p-' and c[2:8] != 'others']

# Filter out inactive entities
new_indices = []

for idx in indices:
    total = 0
    for _, r in df_full.iterrows():
        if str(r[idx]) in ['1', '2']:
            total += 1
    if total >= 200:
        new_indices.append(idx)

indices = new_indices

votes = [[{'pp': 0, 'pn': 0, 'np': 0, 'nn': 0} for _ in range(len(indices))] for _ in range(len(indices))]

for i1, idx1 in enumerate(tqdm(indices)):
    for i2, idx2 in enumerate(indices):
        for _, r in df_full.iterrows():
            if str(r[idx1]) == '1' and str(r[idx2]) == '1':
                votes[i1][i2]['pp'] += 1
            if str(r[idx1]) == '1' and str(r[idx2]) == '2':
                votes[i1][i2]['pn'] += 1
            if str(r[idx1]) == '2' and str(r[idx2]) == '1':
                votes[i1][i2]['np'] += 1
            if str(r[idx1]) == '2' and str(r[idx2]) == '2':
                votes[i1][i2]['nn'] += 1

def get_dist(votes):
    total = votes['pp'] + votes['nn'] + votes['np'] + votes['pn']
    return (votes['pn'] + votes['np']) / total

dists = [[get_dist(votes[i][j]) for i, _ in enumerate(votes)] for j, _ in enumerate(votes)]

In [None]:
entity_names = {
    'p-fdp': "Free Democratic Party", # Freisinnig-demokratische Partei
    'p-sps': "Social Democratic Party", # Sozialdemokratische Partei
    'p-svp': "Swiss People's Party", # Schweizerische Volkspartei
    'p-evp': "Evangelical People's Party", # Evangelische Volkspartei
    'p-gps': "Green Party", # Grüne Partei der Schweiz
    'p-pda': "Swiss Party of Labour", # Partei der Arbeit
    'p-sd': "Swiss Democrats", # Schweizer Demokraten
    'p-edu': "Federal Democratic Union", # Eidgenössisch-Demokratische Union
    'p-fps': "Automobile Party", # Autopartei
    'p-cvp': "Christian Democratic People's Party", # Christlichdemokratische Volkspartei
    'p-lps': "Liberal Party", # Liberale Partei der Schweiz
    'p-ldu': "Alliance of Independents", # Landesring der Unabhängigen
    'p-eco': "Economiesuisse",
    'p-sgv': "Swiss Union of Arts and Crafts", # Schweizerischer Gewerbeverband
    'p-sbv': "Swiss Farmers' Union", # Schweizer Bauernverband
    'p-sgb': "Swiss Trade Union Federation", # Schweizerischer Gewerkschaftsbund
    'p-travs': "Travail.Suisse",
    'p-sav': "Swiss Employers Association" # Schweizerischer Arbeitgeberverband
}

In [None]:
# Agreement between sps and svp
for i, idx in enumerate(indices):
    if idx == 'p-svp':
        i_svp = i
    if idx == 'p-sps':
        i_sps = i

votes[i_sps][i_svp]

In [None]:
# 1D
from sklearn.decomposition import PCA
pca = PCA(n_components=1)
data = pca.fit_transform(dists)

data = -data

perm = [(data[i], i) for i in range(len(indices))]
perm.sort()
perm = [i for d, i in perm]

In [None]:
rows = []

for i in range(len(dists)):
    r = {'name': entity_names[indices[perm[i]]]}
    for j in range(len(dists)):
        agreement = 1 - dists[perm[i]][perm[j]]
        r[entity_names[indices[perm[j]]]] = agreement
    rows.append(r)

data = pd.DataFrame(rows)
data = data.set_index('name')
heatmap = sns.heatmap(data)
plt.xlabel(None)
plt.ylabel(None)
plt.setp(heatmap.get_xticklabels(), rotation=45, horizontalalignment='right')
plt.savefig('output/heatmap-votes.png', bbox_inches='tight')
plt.show()

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, metric='precomputed', perplexity=5, init="random", random_state=42)
data = tsne.fit_transform(np.array(dists))

plt.scatter(data[:, 0], data[:, 1])

for i, label in enumerate(indices):
    plt.annotate(label[2:], (data[i, 0], data[i, 1]))

plt.title('PCA Visualization')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.show()

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
data = pca.fit_transform(dists)

data[:, 0] = -data[:, 0]

plt.scatter(data[:, 0], data[:, 1])

for i, label in enumerate(indices):
    plt.annotate(label, (data[i, 0], data[i, 1]))

plt.title('PCA Visualization')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.show()