Scraping data from the Elections PEI website, we form simple datasets to be used in visualizations in our web app and future analysis.

In [1]:
# Dependencies.
import pandas as pd

In [2]:
# Pull the webpage tables.
web = pd.read_html('https://www.electionspei.ca/2019-election-results')

In [3]:
# Our first table is the first one found by pd.
df = web[0]
df.head(10)

Unnamed: 0,0,1,2,3,4,5
0,267 polls reporting,Green,Ind,Liberal,NDP,PC
1,Popular Vote,"25, 302",282,24346,2454,30415
2,Percent,30.6%,0.3%,29.4%,3.0%,36.7%
3,Leading,8,0,6,0,13
4,Districts,,,,,
5,1,804,-,861,-,1347
6,2,865,-,663,49,1493
7,3,675,-,785,124,1373
8,4,781,-,615,-,1545
9,5,1152,-,902,38,934


In [4]:
# Separate and sort data.
df_summary = df.head(4).copy().set_index(0).T.set_index('267 polls reporting')
df_summary.index.name = None

# Remove extra space between Popular and Vote.
df_summary.columns = df_summary.columns.str.replace('  ', ' ')

# Fix type error on Green Popular Vote.
df_summary.loc['Green', 'Popular Vote'] = '25302'

df_summary

Unnamed: 0,Popular Vote,Percent,Leading
Green,25302,30.6%,8
Ind,282,0.3%,0
Liberal,24346,29.4%,6
NDP,2454,3.0%,0
PC,30415,36.7%,13


In [5]:
# Convert datatypes for columns.
df_summary['Popular Vote'] = pd.to_numeric(df_summary['Popular Vote'])
df_summary['Leading'] = pd.to_numeric(df_summary['Leading'])
df_summary['Percent'] = pd.to_numeric(df_summary['Percent'].str.rstrip('%'))

In [6]:
# Check data types.
df_summary.dtypes

0
Popular Vote      int64
Percent         float64
Leading           int64
dtype: object

In [7]:
# Save table.
df_summary.to_csv('elections_transformed/01_2019_provincial_election_summary.csv')
df_summary

Unnamed: 0,Popular Vote,Percent,Leading
Green,25302,30.6,8
Ind,282,0.3,0
Liberal,24346,29.4,6
NDP,2454,3.0,0
PC,30415,36.7,13
