Import necessary libraries

In [97]:
import numpy as py
import pandas as pd
import openpyxl 
from openpyxl.utils.dataframe import dataframe_to_rows

Load and Prep Data

In [98]:
#Load election results
election_results = pd.read_csv('Results.csv')
#Load new constituencies
file = 'New Constituencies.xlsx'
xl = pd.ExcelFile(file)
#Sheet 9 contains mapping of Province names to Old constituency names and new contituency names
new_constituencies = xl.parse('Sheet9')
#Sheet 1 contains mapping of Provinces to new constituency names and the seats per province
seats_df = xl.parse('Sheet1')
nc = pd.DataFrame(new_constituencies['New Constituencies'].unique().tolist())
#Here we are merging data from Sheet 1 and Sheet 9 based on the new constituency names, which is common for both sheets, to obtain the number of seats per new constituency
nc_up = pd.merge(nc, seats_df, left_on=0,right_on = 'New Constituency')
nc_up=nc_up.drop([0,'Province'], axis = 1)

In [99]:
#Separate the election results into preliminary and validated 
#For this project, we will only be using validated election results
results_val = election_results.loc[election_results['Type of results'] == 'validated']
results_prelim = election_results.loc[election_results['Type of results'] == 'preliminary']

In [100]:
# Get aggregated number of votes for each political party
pei=results_val.groupby('Political affiliation')['Votes obtained'].sum()
pei=pei.to_frame()

In [101]:
#Merge the validated election results with the new_constituencies table. This essentially assigns newly created constituencies discussed in our research to vote based on the Provice from where the vote came.
#The electoral district name and province name did not match in some cases, and had to be manually altered. The main issue here was the use of 2 hyphens in Electoral district name. 
df_combo = pd.merge(results_val,new_constituencies, left_on="Electoral district name",right_on ="Provinces")

In [102]:
#Drop columns that will not be used
df_combo=df_combo.drop(['Provinces','Appartenance politique','% Votes obtained'], axis = 1)
df_combo1 = df_combo.drop(['Type of results','Surname','Given name','Rejected ballots','Total number of ballots cast'], axis = 1)
df_combo1 = df_combo1.drop(["Middle name(s) "], axis = 1)

In [103]:
#Aggregate the votes obtained per party by each of the new constituencies that are discussed in the research
votesgrouped = df_combo1.groupby(['New Constituencies','Political affiliation'])["Votes obtained"].apply(lambda x : x.astype(int).sum())

In [104]:
votesgrouped.to_csv('Votes.csv',header=True,encoding='utf-8-sig')
votesgrouped = pd.read_csv('Votes.csv')

In [105]:
#Only select the parties that are classified as popular parties
array = ['Liberal','Conservative','Bloc Québécois','Green Party','NDP-New Democratic Party']
votesgrp1=votesgrouped.loc[votesgrouped['Political affiliation'].isin(array)]
#votesgrp1=votesgrouped

In [106]:
#pei_new=votesgrp1.groupby('Political affiliation')['Votes obtained'].sum()
pei_new=votesgrp1

In [107]:
#rename the original Votes Obtained column to Votes_Seat0
pei_new.rename(columns={'Votes obtained': 'Votes_Seat0'}, inplace=True)

In [108]:
df_merge = pd.merge(pei_new, nc_up, left_on='New Constituencies',right_on ='New Constituency')
df_merge = df_merge.rename(columns={'New Constituencies': 'New_Constituencies','Votes_Seat1': 'Votes_Seat0'})

In this section we implement the D'Hondt formula. 
The requirement for this implementation is that there will be atleast one seat assignment. This implementation then allows the user to see the seat distributions between various parties based on any number of seat assignments.

In [109]:
wb = openpyxl.Workbook()

In [110]:
wb.save("DHondt Results.xlsx") 

In [111]:
for j in range(1,nc_up.shape[0]): #for each of the 57 new constituencies, we will calculate the seat allocation per party
    #The first step here is to get the results for each constituency
    aa=df_merge[df_merge.New_Constituencies==nc_up['New Constituency'].iloc[j]]
    #Next we assign a new variable seats with the number of seats designated for constituency j
    seats = int(nc_up['Number of Seats'].iloc[j])
    #A new variable, first_max_new is created to store the maximum number of votes a party in constituency j received
    first_max_new = pei_new['Votes_Seat0'].max()
    # Here a new variable seat_new is being initialized. 
    seat_new = 0
    aa['Seats_Seat0'] = seat_new
    aa.reset_index(inplace=True)
    #If the number of votes of a party for constituency j is the highest, then that party for constituency j is awarded a seat.
    aa.loc[aa['Votes_Seat0'] == first_max_new, 'Seats_Seat0'] = 1
    for i in range (1,(seats+1)):
        #The following 4 lines of code is used to get the previous and current vote values and seat allocations
        col_name_votes = 'Votes_Seat'+str(i)
        col_name_votes_prev = 'Votes_Seat'+str(i-1)
        col_name_seats = 'Seats_Seat'+str(i)
        col_name_seats_prev = 'Seats_Seat'+str(i-1)
        
        #In this line of code, we are calculating the number of votes a party will receive. This is the DHondt formula
        aa[col_name_votes]=aa[col_name_votes_prev]/(1+aa[col_name_seats_prev])
        #In this line of code we are assigning the highest number of new votes from the previous calculation to a variable. 
        max_val = aa[col_name_votes].max()
        aa[col_name_seats] = aa[col_name_seats_prev]
        #Here we are checking which party's new votes matches the max_val votes, then that party is assigned a seat.
        aa.loc[aa[col_name_votes] == max_val, col_name_seats] = aa[col_name_seats_prev] + 1
    #Creating a new sheet in the excel file for each new constituency
    sheetname = nc_up['New Constituency'].iloc[j]
    sheet=wb.create_sheet(index = 1 , title = sheetname[:10]) 
    aa=aa.drop(['Seats_Seat0'], axis=1)
    
    for r in dataframe_to_rows(aa, index=True, header=True):
        sheet.append(r)
wb.save("DHondt Results.xlsx") 
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
