# SwissVotes - Analysis

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import re

## Data cleaning

In [None]:
df_full = pd.read_csv('swiss-votes.csv', delimiter=';')
df_full.head()

In [None]:
df = pd.DataFrame()
df['title_fr'] = df_full['titel_off_f'] # Title of the vote in French
df['date'] = pd.to_datetime(df_full['datum'], dayfirst=True)
df['date_year'] = df['date'].dt.year
df['type'] = df_full['rechtsform'].map({
    1: 'mandatory-referendum',
    2: 'optional-referendum',
    3: 'popular-initiative',
    4: 'AA',
    5: 'BB'
})
df['author'] = df_full['urheber']
df['valid_signatures'] = df_full['unter_g']
df

### Political parties

In [None]:
def has_svp(authors): # UDC: Union Democrate du Centre
    authors = authors.lower()
    return "schweizerische volkspartei" in authors or re.search(r'\bsvp\b', authors) is not None

def has_sp(authors): # PS: Parti Socialiste
    authors = authors.lower()
    return "sozialdemokratischen partei" in authors or re.search(r'\bsp\b', authors) is not None

def has_mitte(authors): # Partis centristes
    authors = authors.lower()
    return "sozialdemokratischen partei" in authors or re.search(r'\b(cvp|bdp)\b', authors) is not None

def has_fdp(authors): # Parti libéral-démocrate
    authors = authors.lower()
    return re.search(r'\bfdp\b', authors) is not None

df['author_partei_svp'] = df['author'].map(has_svp)
df['author_partei_sp'] = df['author'].map(has_sp)
df['author_partei_mitte'] = df['author'].map(has_mitte)
df['author_partei_fdp'] = df['author'].map(has_fdp)

In [None]:
sum(df['author_partei_svp']), sum(df['author_partei_sp']), sum(df['author_partei_mitte']), sum(df['author_partei_fdp'])

## Analysis

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['date_year'], bins=(df['date_year'].max()-df['date_year'].min()) // 10)
plt.title('Number of popular votes per year')
plt.xlabel('Year')
plt.ylabel('Number of votes')
plt.show()

In [None]:
sns.displot(df, x='date_year', hue='type', kind='hist', multiple='fill', aspect=2, height=6)
plt.title("Proportion of the referendum used over the years")
plt.xlabel("Year")
plt.xlim(df['date_year'].min(), df['date_year'].max())
plt.ylabel("Proportion")
plt.show()