# Elections PEI - 2019 Provincial Election

Scraping data from the [Elections PEI](https://www.electionspei.ca/2019-election-results) website, we form simple datasets to be used in visualizations in our web app and future analysis.

In [1]:
# Dependencies.
import pandas as pd
import pickle

In [2]:
# Open our created pickle dictionary from 03.
with open('elections_transformed/03_district_dict.pkl', 'rb') as f:
    ddict = pickle.load(f)

In [3]:
# Open the district counts table from 02.
df_counts = pd.read_csv('elections_transformed/02a_2019_provincial_election_district_counts.csv', index_col=0)
df_counts

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,18,19,20,21,22,23,24,25,26,27
Green,804,865,675,781,1152,805,697,747,709,1057,...,899,1041,805,1258,1302,1101,761,231,317,584
Ind,0,0,0,0,0,0,0,0,0,0,...,0,54,0,0,0,0,0,0,0,0
Liberal,861,663,785,615,902,882,557,1196,635,1420,...,489,417,389,892,938,882,1100,1102,1153,1388
NDP,0,49,124,0,38,31,35,46,46,41,...,30,32,31,39,65,81,33,898,99,44
PC,1347,1493,1373,1545,934,1270,1752,1300,1080,865,...,1920,1680,2008,1037,662,1026,575,462,1312,802


In [4]:
# Get the list of district names from the dictionary.
district_names = []
for key in ddict:
    district_names.append(ddict[key]['district_name'])
district_names

['Souris - Elmira',
 'Georgetown - Pownal',
 'Montague - Kilmuir',
 'Belfast - Murray River',
 'Mermaid-Stratford',
 'Stratford-Keppoch',
 'Morell-Donagh',
 'Stanhope-Marshfield',
 'Charlottetown-Hillsborough Park',
 'Charlottetown-Winsloe',
 'Charlottetown-Belvedere',
 'Charlottetown-Victoria Park',
 'Charlottetown-Brighton',
 'Charlottetown-West Royalty',
 'Brackley-Hunter River',
 'Cornwall-Meadowbank',
 'New Haven Rocky Point',
 'Rustico-Emerald',
 'Borden-Kinkora',
 'Kensington-Malpeque',
 'Summerside-Wilmot',
 'Summerside-South Drive',
 'Tyne Valley-Sherbrooke',
 'Evangeline-Miscouche',
 "O'Leary-Inverness",
 'Alberton-Bloomfield',
 'Tignish-Palmer Road']

In [5]:
# Store as the column names.
df_counts.columns = district_names
df_counts

Unnamed: 0,Souris - Elmira,Georgetown - Pownal,Montague - Kilmuir,Belfast - Murray River,Mermaid-Stratford,Stratford-Keppoch,Morell-Donagh,Stanhope-Marshfield,Charlottetown-Hillsborough Park,Charlottetown-Winsloe,...,Rustico-Emerald,Borden-Kinkora,Kensington-Malpeque,Summerside-Wilmot,Summerside-South Drive,Tyne Valley-Sherbrooke,Evangeline-Miscouche,O'Leary-Inverness,Alberton-Bloomfield,Tignish-Palmer Road
Green,804,865,675,781,1152,805,697,747,709,1057,...,899,1041,805,1258,1302,1101,761,231,317,584
Ind,0,0,0,0,0,0,0,0,0,0,...,0,54,0,0,0,0,0,0,0,0
Liberal,861,663,785,615,902,882,557,1196,635,1420,...,489,417,389,892,938,882,1100,1102,1153,1388
NDP,0,49,124,0,38,31,35,46,46,41,...,30,32,31,39,65,81,33,898,99,44
PC,1347,1493,1373,1545,934,1270,1752,1300,1080,865,...,1920,1680,2008,1037,662,1026,575,462,1312,802


In [6]:
# Store the table and the transposed table.
df_counts.to_csv('elections_transformed/04a_2019_pe_district_counts_named.csv')
df_counts.T.to_csv('elections_transformed/04a_2019_pe_district_counts_by_dist_named.csv')

In [7]:
print('District DataFrames confirmed edited: ')

# Loop through dictionary to edit every DF.
for key in ddict:
    # Copy the DF to manipulate.
    fix_df = ddict[key]['district_df'].copy()
    
    # Set columns to remove (later).
    quant_polls_col = ddict[key]['district_df'].loc[0,0]
    poll_col = ddict[key]['district_df'].loc[3,0]
    
    # Transpose DF after setting the index to the first column.
    fix_df = fix_df.set_index(0).T
    # Remove index name.
    fix_df.index.name = None

    # Split the candidate name into three columns.
    fix_df[['Party', 'Name']] = fix_df[quant_polls_col].str.split(' ', 1, expand=True)
    # Remove the brakets from the Party name.
    fix_df['Party'] = fix_df['Party'].str.replace(r'[()]', '', regex=True)
    
    # Drop the unneeded columns, set Party as index.
    fix_df = fix_df.drop(columns=[quant_polls_col, poll_col]).set_index('Party')

    # Store the columns.
    cols = fix_df.columns
    # Columns to not convert to numerical.
    non_num_cols = ['Name']
    convert_cols = [col for col in cols if col not in non_num_cols]

    # Loop through each column and convert to numerical.
    for col in convert_cols:
        fix_df[col] = pd.to_numeric(fix_df[col].str.replace('%', ''))
        # Store the column name to reorder columns.
        non_num_cols.append(col)

    # Reorder columns.
    fix_df = fix_df[non_num_cols]
    
    # Change the name of the index to the name of the district.
    fix_df.index.name = ddict[key]['district_name']

    # Store organised DF in dict again.
    ddict[key]['district_df'] = fix_df.copy()
    
    # Confirm each district is completed.
    print(f'{fix_df.index.name} complete.', end=' ')

District DataFrames confirmed edited: 
Souris - Elmira complete. Georgetown - Pownal complete. Montague - Kilmuir complete. Belfast - Murray River complete. Mermaid-Stratford complete. Stratford-Keppoch complete. Morell-Donagh complete. Stanhope-Marshfield complete. Charlottetown-Hillsborough Park complete. Charlottetown-Winsloe complete. Charlottetown-Belvedere complete. Charlottetown-Victoria Park complete. Charlottetown-Brighton complete. Charlottetown-West Royalty complete. Brackley-Hunter River complete. Cornwall-Meadowbank complete. New Haven Rocky Point complete. Rustico-Emerald complete. Borden-Kinkora complete. Kensington-Malpeque complete. Summerside-Wilmot complete. Summerside-South Drive complete. Tyne Valley-Sherbrooke complete. Evangeline-Miscouche complete. O'Leary-Inverness complete. Alberton-Bloomfield complete. Tignish-Palmer Road complete. 

In [8]:
# Save the dictionary to a pkl.
with open('elections_transformed/04_district_dict_fin.pkl', 'wb') as f:
    pickle.dump(ddict, f, pickle.HIGHEST_PROTOCOL)

In [9]:
# Save every district DF as a CSV.
for key in ddict:
    filename = f"{key}_df_{ddict[key]['district_name']}".replace(' ', '_')
    if filename[:2] in [x + '_' for x in list('123456789')]:
        filename = '0' + filename
    ddict[key]['district_df'].to_csv(f'elections_transformed/individual_districts/{filename}.csv')