# Elections PEI - 2019 Provincial Election

Scraping data from the [Elections PEI](https://www.electionspei.ca/2019-election-results) website, we form simple datasets to be used in visualizations in our web app and future analysis.

In [1]:
# Dependencies.
import pandas as pd

In [2]:
# Pull the webpage tables.
web = pd.read_html('https://www.electionspei.ca/2019-election-results')

In [3]:
# Our first table is the first one found by pd.
df = web[0]
df.head(10)

Unnamed: 0,0,1,2,3,4,5
0,267 polls reporting,Green,Ind,Liberal,NDP,PC
1,Popular Vote,"25, 302",282,24346,2454,30415
2,Percent,30.6%,0.3%,29.4%,3.0%,36.7%
3,Leading,8,0,6,0,13
4,Districts,,,,,
5,1,804,-,861,-,1347
6,2,865,-,663,49,1493
7,3,675,-,785,124,1373
8,4,781,-,615,-,1545
9,5,1152,-,902,38,934


In [4]:
df = df.drop(index=[1, 2, 3, 4]).set_index(0).T.set_index('267 polls reporting')
df.index.name = None

df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,18,19,20,21,22,23,24,25,26,27
Green,804,865,675,781,1152,805,697,747,709,1057,...,899,1041,805,1258,1302,1101,761,231,317,584
Ind,-,-,-,-,-,-,-,-,-,-,...,-,54,-,-,-,-,-,-,-,-
Liberal,861,663,785,615,902,882,557,1196,635,1420,...,489,417,389,892,938,882,1100,1102,1153,1388
NDP,-,49,124,-,38,31,35,46,46,41,...,30,32,31,39,65,81,33,898,99,44
PC,1347,1493,1373,1545,934,1270,1752,1300,1080,865,...,1920,1680,2008,1037,662,1026,575,462,1312,802


In [5]:
# Replace missing values '-' with 0, and convert to numeric.
for col in df.columns:
    df[col] = pd.to_numeric(df[col].str.replace('-', '0'))
    
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,18,19,20,21,22,23,24,25,26,27
Green,804,865,675,781,1152,805,697,747,709,1057,...,899,1041,805,1258,1302,1101,761,231,317,584
Ind,0,0,0,0,0,0,0,0,0,0,...,0,54,0,0,0,0,0,0,0,0
Liberal,861,663,785,615,902,882,557,1196,635,1420,...,489,417,389,892,938,882,1100,1102,1153,1388
NDP,0,49,124,0,38,31,35,46,46,41,...,30,32,31,39,65,81,33,898,99,44
PC,1347,1493,1373,1545,934,1270,1752,1300,1080,865,...,1920,1680,2008,1037,662,1026,575,462,1312,802


In [6]:
# Save table and transposed table.
df.to_csv('elections_transformed/02a_2019_provincial_election_district_counts.csv')
df.T.to_csv('elections_transformed/02b_2019_provincial_election_district_counts_by_dist.csv')