In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
# Create an array for all the years we're interested in
years = [2011]

In [60]:
# Get list of riding numbers
folderName = "pollresults_resultatsbureau_canada"

# This file contains the provinces and their electoral districts
ridingFile = os.path.join(folderName, "RidingListClean.csv")

In [71]:
ridingList = pd.read_csv(ridingFile)
# We only need the first three columns
ridingList = ridingList.iloc[:, [0, 2]]
ridingList = ridingList[ridingList["Province2"] == "New Brunswick"]
ridingList

Unnamed: 0,Province2,Electoral_District_Number_Num_ro_de_circonscription
22,New Brunswick,13001
23,New Brunswick,13002
24,New Brunswick,13003
25,New Brunswick,13004
26,New Brunswick,13005
27,New Brunswick,13006
28,New Brunswick,13007
29,New Brunswick,13008
30,New Brunswick,13009
31,New Brunswick,13010


In [72]:
ridings = ridingList.ix[:, 1]
ridings

22    13001
23    13002
24    13003
25    13004
26    13005
27    13006
28    13007
29    13008
30    13009
31    13010
Name: Electoral_District_Number_Num_ro_de_circonscription, dtype: int64

In [73]:
def percent_by_polling_district(riding, year):
    print "Riding: " + str(riding) + ", Year: " + str(year)
    fileName = "pollresults_resultatsbureau" + str(riding) + ".csv"
    filePath = os.path.join(folderName, fileName)
    # Load the data
    pollData = pd.read_csv(filePath)
    
    # Get column names, and remove French portions
    colNames = list(pollData.columns.values)
    colNames = [x.split('/')[0] for x in colNames]
    pollData.columns = colNames
    
    # Drop unnecessary columns
    listColDrop = ['Electoral District Name_English',
                   'Electoral District Name_French',
                   'Void Poll Indicator',
                   'No Poll Held Indicator', 
                   'Merge With',
                   'Rejected Ballots for Polling Station',
                   'Political Affiliation Name_French',
                   "Candidate's First Name",
                   "Candidate's Family Name",
                   "Candidate's Middle Name", 
                   'Incumbent Indicator',
                   'Elected Candidate Indicator']
    pollData = pollData.drop(listColDrop, axis=1)
    
    # Strip the polling ID column of whitespace.
    polCol = 'Polling Station Number'
    s = lambda x: str(x).strip(" ")
    pollData[polCol] = pollData[polCol].map(s)
    
    # Create a pivot table of the data by polling district/candidate name
    pollData = pollData.pivot(
        index='Polling Station Number',
        columns='Political Affiliation Name_English',
        values='Candidate Poll Votes Count')
    # Turn the index back into a column
    pollData.reset_index(level=0, inplace=True)
    
    # Strip the letters off polling stations since the geospatial data
    #  does not include these letters.
    stripCharacters = "ABCDEFG"
    s = lambda x: str(x).strip(stripCharacters)
    statCol = 'Polling Station Number'
    pollData[statCol] = pollData[statCol].map(s)
    
    # Merge polling stations
    pollData = pollData.groupby('Polling Station Number').sum()
    pollData.reset_index(level=0, inplace=True)
    
    # Get the vote totals
    pollData['Vote Totals'] = pollData.sum(axis=1, numeric_only=True)

    # Calculate the percent for each

    # Grab the data we want converted to a percent
    numColsPollData = len(pollData.columns)
    pollDataPercent = pollData.iloc[:, range(1, numColsPollData-1)].copy()

    # Divide it by the total votes for each polling district
    pollDataPercent = pollDataPercent.div(pollData['Vote Totals'], axis=0)

    pollDataPercent = np.round(pollDataPercent*100, decimals=2)

    # Rename columns
    colNames = list(pollDataPercent.columns.values)
    colNames = [x + " (%)" for x in colNames]
    pollDataPercent.columns = colNames

    # Merge it with the original data set
    pollData = pd.concat([pollData, pollDataPercent], axis=1)
    return pollData

In [74]:
for riding in ridings:
    for year in years:
        pollData = percent_by_polling_district(riding, year)

Riding: 13001, Year: 2011
Riding: 13002, Year: 2011
Riding: 13003, Year: 2011
Riding: 13004, Year: 2011
Riding: 13005, Year: 2011
Riding: 13006, Year: 2011
Riding: 13007, Year: 2011
Riding: 13008, Year: 2011
Riding: 13009, Year: 2011
Riding: 13010, Year: 2011


In [75]:
pollData

Political Affiliation Name_English,Polling Station Number,Conservative,Green Party,Liberal,NDP-New Democratic Party,Vote Totals,Conservative (%),Green Party (%),Liberal (%),NDP-New Democratic Party (%)
0,1,62,3,25,20,110,56.36,2.73,22.73,18.18
1,1-1,91,0,50,50,191,47.64,0.00,26.18,26.18
2,10,118,4,74,59,255,46.27,1.57,29.02,23.14
3,100,118,4,16,29,167,70.66,2.40,9.58,17.37
4,101,85,4,17,26,132,64.39,3.03,12.88,19.70
5,102,157,7,17,26,207,75.85,3.38,8.21,12.56
6,103,92,3,11,19,125,73.60,2.40,8.80,15.20
7,104,167,6,24,38,235,71.06,2.55,10.21,16.17
8,105,87,7,16,18,128,67.97,5.47,12.50,14.06
9,106,177,6,30,43,256,69.14,2.34,11.72,16.80
