In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
# Create an array for all the years we're interested in
years = [2011]

In [60]:
# Get list of riding numbers
folderName = "pollresults_resultatsbureau_canada"

# This file contains the provinces and their electoral districts
ridingFile = os.path.join(folderName, "RidingListClean.csv")

In [71]:
ridingList = pd.read_csv(ridingFile)
# We only need the first three columns
ridingList = ridingList.iloc[:, [0, 2]]
ridingList = ridingList[ridingList["Province2"] == "New Brunswick"]
ridingList

Unnamed: 0,Province2,Electoral_District_Number_Num_ro_de_circonscription
22,New Brunswick,13001
23,New Brunswick,13002
24,New Brunswick,13003
25,New Brunswick,13004
26,New Brunswick,13005
27,New Brunswick,13006
28,New Brunswick,13007
29,New Brunswick,13008
30,New Brunswick,13009
31,New Brunswick,13010


In [72]:
ridings = ridingList.ix[:, 1]
ridings

22    13001
23    13002
24    13003
25    13004
26    13005
27    13006
28    13007
29    13008
30    13009
31    13010
Name: Electoral_District_Number_Num_ro_de_circonscription, dtype: int64

In [107]:
def percent_by_polling_district(riding, year):
    print "Riding: " + str(riding) + ", Year: " + str(year)
    fileName = "pollresults_resultatsbureau" + str(riding) + ".csv"
    filePath = os.path.join(folderName, fileName)
    # Load the data
    pollData = pd.read_csv(filePath)
    
    # Get column names, and remove French portions
    colNames = list(pollData.columns.values)
    colNames = [x.split('/')[0] for x in colNames]
    pollData.columns = colNames
    
    # Drop unnecessary columns
    listColDrop = ['Electoral District Name_English',
                   'Electoral District Name_French',
                   'Void Poll Indicator',
                   'No Poll Held Indicator', 
                   'Merge With',
                   'Rejected Ballots for Polling Station',
                   'Political Affiliation Name_French',
                   "Candidate's First Name",
                   "Candidate's Family Name",
                   "Candidate's Middle Name", 
                   'Incumbent Indicator',
                   'Elected Candidate Indicator']
    pollData = pollData.drop(listColDrop, axis=1)
    
    # Strip the polling ID column of whitespace.
    polCol = 'Polling Station Number'
    s = lambda x: str(x).strip(" ")
    pollData[polCol] = pollData[polCol].map(s)
    
    # Create a pivot table of the data by polling district/candidate name
    pollData = pollData.pivot(
        index='Polling Station Number',
        columns='Political Affiliation Name_English',
        values='Candidate Poll Votes Count')
    # Turn the index back into a column
    pollData.reset_index(level=0, inplace=True)
    
    # Strip the letters off polling stations since the geospatial data
    #  does not include these letters.
    stripCharacters = "ABCDEFG"
    s = lambda x: str(x).strip(stripCharacters)
    statCol = 'Polling Station Number'
    pollData[statCol] = pollData[statCol].map(s)
    
    # Merge polling stations
    pollData = pollData.groupby('Polling Station Number').sum()
    pollData.reset_index(level=0, inplace=True)
    
    # Get the vote totals
    pollData['Vote Totals'] = pollData.sum(axis=1, numeric_only=True)

    # Calculate the percent for each

    # Grab the data we want converted to a percent
    numColsPollData = len(pollData.columns)
    pollDataPercent = pollData.iloc[:, range(1, numColsPollData-1)].copy()

    # Divide it by the total votes for each polling district
    pollDataPercent = pollDataPercent.div(pollData['Vote Totals'], axis=0)

    pollDataPercent = np.round(pollDataPercent*100, decimals=2)

    # Rename columns
    colNames = list(pollDataPercent.columns.values)
    colNames = [x + " (%)" for x in colNames]
    pollDataPercent.columns = colNames

    # Merge it with the original data set
    pollData = pd.concat([pollData, pollDataPercent], axis=1)
    
    # Add back column for electoral district
    pollData['District'] = riding
    
    return pollData

In [108]:
for year in years:
    first = True
    for riding in ridings:
        pollData = percent_by_polling_district(riding, year)
        if first == True:
            combined = pollData
        else:
            combined = pd.concat([combined, pollData], axis=0, ignore_index=True)
        first = False

    # Reorder columns before writing
    cols = list(combined)
    cols.insert(0, cols.pop(cols.index('Polling Station Number')))
    cols.insert(0, cols.pop(cols.index('District')))
    combined = combined.ix[:, cols]
    
    fileName = str(year) + "Combined.csv"
    filePath = os.path.join("Output", fileName)
    combined.to_csv(filePath, index=False, encoding='utf-8')

Riding: 13001, Year: 2011
Riding: 13002, Year: 2011
Riding: 13003, Year: 2011
Riding: 13004, Year: 2011
Riding: 13005, Year: 2011
Riding: 13006, Year: 2011
Riding: 13007, Year: 2011
Riding: 13008, Year: 2011
Riding: 13009, Year: 2011
Riding: 13010, Year: 2011


In [105]:
cols = list(combined)
cols.insert(0, cols.pop(cols.index('Polling Station Number')))
cols.insert(0, cols.pop(cols.index('District')))
combined = combined.ix[:, cols]

In [106]:
combined

Unnamed: 0,District,Polling Station Number,CHP Canada,CHP Canada (%),Conservative,Conservative (%),Green Party,Green Party (%),Independent,Independent (%),Liberal,Liberal (%),NDP-New Democratic Party,NDP-New Democratic Party (%),Vote Totals
0,13001,1,,,21,13.29,,,,,27,17.09,110,69.62,158
1,13001,10,,,29,14.01,,,,,18,8.70,160,77.29,207
2,13001,100,,,28,11.97,,,,,16,6.84,190,81.20,234
3,13001,101,,,28,13.27,,,,,32,15.17,151,71.56,211
4,13001,102,,,25,9.88,,,,,60,23.72,168,66.40,253
5,13001,103,,,17,8.10,,,,,45,21.43,148,70.48,210
6,13001,104,,,9,5.26,,,,,57,33.33,105,61.40,171
7,13001,105,,,9,5.62,,,,,57,35.62,94,58.75,160
8,13001,106,,,25,10.46,,,,,50,20.92,164,68.62,239
9,13001,107,,,19,10.16,,,,,26,13.90,142,75.94,187
