# First-past-the-post election

### Using the existing Canadian electoral system, calculate the outcome of the federal election

## Setup

In [1]:
# Imports
import math
import numpy as np
import pandas as pd

In [2]:
# Import data

election_data = pd.read_csv('../data/2019-canada-federal-results.csv')

In [3]:
# Generate metadata

ridings = {r['Riding ID'] for i, r in election_data.iterrows()}
provinces = {r['Province ID'] for i, r in election_data.iterrows()}
parties = {r['Party ID'] for i, r in election_data.iterrows()}

In [4]:
# Validate the votes total
# 2019 Federal Total Valid Votes = 18,170,880 (from elections.ca)

election_data['Votes'].sum()

18170880

## Declare winners in each riding

In [5]:
# Add a winner column to the dataframe

election_data['Winner'] = False

In [6]:
# For each riding, set the winner

for riding in ridings:
    # Grab the relevant rows for the riding
    riding_data = election_data.loc[election_data['Riding ID'] == riding]
    
    # Determine the index of the candidate with the maximum votes
    winner_index = riding_data['Votes'].idxmax()
    
    # Set the winner column to True for that candidate
    election_data.loc[winner_index, 'Winner'] = True

In [7]:
# Generate a table of just the winners

winners = election_data.loc[election_data['Winner'] == True]

In [8]:
# Verify we selected 338 winners

len(winners.index)

338

## Summarize the results by party

In [9]:
# Create a table of the results by party

party_results = pd.DataFrame(columns=['Seats', 'Seats %', 'Votes', 'Votes %'], index=list(parties))

# Calculate results by party
for party in parties:
    # Determine how many seats were won
    party_results.loc[party, 'Seats'] = len(winners.loc[winners['Party ID'] == party].index)
    # Determine how many total votes were received
    party_results.loc[party, 'Votes'] = election_data.loc[election_data['Party ID'] == party, 'Votes'].sum()

In [10]:
# Fill in % columns

party_results['Seats %'] = party_results['Seats'] / party_results['Seats'].sum() * 100
party_results['Votes %'] = party_results['Votes'] / party_results['Votes'].sum() * 100

In [11]:
# Sort and validate the results
# 2019 Federal = LPC 157 | CPC 121 | BQ 32 | NDP 24 | GRE 3 | IND 1

party_results.loc[(party_results['Votes %'] > 1) | (party_results['Seats'] > 0)].sort_values('Seats', ascending=False)

Unnamed: 0,Seats,Seats %,Votes,Votes %
LPC,157,46.449704,6018728,33.122931
CPC,121,35.798817,6239227,34.336405
BQ,32,9.467456,1387030,7.633257
NDP,24,7.100592,2903722,15.980085
GRE,3,0.887574,1189607,6.546777
IND,1,0.295858,74291,0.408846
PPC,0,0.0,294092,1.61848


## Calculate the Gallagher Index
Measure of election disproprtionality between seats received and votes received. (https://en.wikipedia.org/wiki/Gallagher_index)

In [12]:
# Add to the table to determine the Gallagher index (sqrt of half the sum of the squared difference between seats and votes)

party_results['Diff'] = party_results['Seats %'] - party_results['Votes %']
party_results['Diff**2'] = party_results['Diff'] ** 2

In [13]:
# Calculate the Gallagher index

gallagher_index = math.sqrt(party_results['Diff**2'].sum() / 2)
print('Gallagher index is', round(gallagher_index, 2))

Gallagher index is 12.18
