In [3]:
import os, csv, sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import PMM
MAJOR_PARTY_THRESH = 0.05

In [4]:
# ================ import data ===============
pathstr="data/raw_2011_copy/"
year = "".join([x for x in pathstr if str.isdigit(x)])
assert str.isdigit(year), "Cannot determine year from path provided. Please include the election year in the name of your folder"
year = int(year)
assert ( int(year) < 2050 and int(year) > 1867 ), "Inferred implausible year=%d, please check folder names for appropriate year"%year

# Define output paths:
if not os.path.exists( os.path.join( pathstr, "PMM_out") ):
    os.makedirs(os.path.join( pathstr, "PMM_out"))
f_qlist_out     = os.path.join( pathstr, "PMM_out", "PMM_qlist.tsv")
f_standings_out = os.path.join( pathstr, "PMM_out", "PMM_standings.tsv")

# info on total # votes cast & turnout.
T3_path = os.path.join(pathstr, "table_tableau03.csv")
# info on seats awarded
T7_path = os.path.join(pathstr, "table_tableau07.csv")        
# Valid votes by party:
T8_path = os.path.join(pathstr, "table_tableau08.csv")        

# character encoding changed in 2019:
if (year < 2019 ):
    df_Nvotes     = pd.read_csv(T3_path, index_col="Province", encoding = "ISO-8859-1")
    Seats_init = pd.read_csv(T7_path, index_col="Province", encoding = "ISO-8859-1")
    VV_bp      = pd.read_csv( T8_path, index_col=0, encoding = "ISO-8859-1")
else:
    df_Nvotes     = pd.read_csv(T3_path, index_col="Province")
    Seats_init = pd.read_csv(T7_path, index_col="Province")
    VV_bp      = pd.read_csv( T8_path, index_col=0)    

In [5]:
# get the total num votes valid, invalid
N_total_votes = df_Nvotes.iloc[:, 6].sum()
print("Total votes : %d " %(N_total_votes) )
print( "Percent votes valid  : %.4f " %( 100*df_Nvotes.iloc[:, 2].sum()/N_total_votes) )
print( "Percent votes invalid: %.4f " %( 100*df_Nvotes.iloc[:, 4].sum()/N_total_votes) )

Total votes : 14823408 
Percent votes valid  : 99.3293 
Percent votes invalid: 0.6707 


In [6]:
Pop_vote_share = pd.Series( VV_bp.sum(axis=1)/N_total_votes,
                            index= VV_bp.index )
# Pop_vote_share

In [8]:
# Select parties over threshold support
maj_parties = Pop_vote_share.index[ Pop_vote_share >= MAJOR_PARTY_THRESH ]
Vote_counts = VV_bp.loc[maj_parties,].sum(axis=1)

Vote_counts.index = [ PMM.party_abbrev[key] for key in Vote_counts.index ]
maj_parties       = [ PMM.party_abbrev[key] for key in maj_parties ]

# All other parties are grouped together as "OTHER"
# i.e. those who are _explicitly_ independent, as well as those whose 
# parties are relegated to "independence" by falling below threshold support.
Vote_counts["OTH"] = sum ( VV_bp.loc[Pop_vote_share < MAJOR_PARTY_THRESH, ].sum(axis = 1 ) )

# "SPL" captures all rejected (i.e. 'spoiled') ballots 
Vote_counts["SPL"] = df_Nvotes.sum(axis=0)[4]

# Vote_counts

In [13]:
Const_Seats = PMM.get_party_seat_standings(Seats_init, maj_parties )
Seats_total_init = sum( Const_Seats )
Standings = pd.DataFrame( {"Votes":Vote_counts, "Seats_init": Const_Seats } )

In [14]:
all_parties = { Standings.index[p]: PMM.party( Standings.index[p], 
                                        Standings.iloc[p,0], 
                                        Standings.iloc[p,1],
                                        N_total_votes,
                                        Seats_total_init)
        for p in range(Standings.shape[0])}

In [25]:
party_namelist  = all_parties.keys()
Num_parties     = len(party_namelist)

In [26]:
Total_quotient_list=[]
for p in party_namelist:
    Total_quotient_list.extend(all_parties[p].party_quotient_list )
Total_quotient_list.sort(reverse=True)

In [27]:
# Sanity check, all constituency seats are assigned, and no others are:
assert all([seat.assigned for seat in Total_quotient_list[0:Seats_total_init-1]]) and not any( [seat.assigned for seat in Total_quotient_list[Seats_total_init:]] ), "ERROR: Total_quotient_list not properly sorted, or inconsistent with expected seat number."

In [34]:
# Shorten list to the first 1:2*Seats_total_init
# (nothing beyond this list has any chance of consideration.
shortlist = Total_quotient_list[:2*Seats_total_init]
Qlist = pd.DataFrame({"j"        :[ q.jval   for q in shortlist],
                      "Value"    :[ q.value  for q in shortlist],
                      "Assigned" :[ int(q.assigned) for q in shortlist],
                      "party"    :[ q.party_att     for q in shortlist],
                      } )
# --- Document the quotient list in a tsv file:
Qlist.round(2).to_csv(f_qlist_out, sep="\t")

In [39]:
approx_Threshold = Total_quotient_list[Seats_total_init-1].value


# Initialize from the first unassigned seat
sval = Seats_total_init
total_seats_assigned = Seats_total_init

In [40]:
while (sval < 2*Seats_total_init):
    # Hard cut-off at 2*Seats_total_init no matter proportionality status.
    # Very unlikely that this will be approached.
    sval += 1
    current_party = Total_quotient_list[sval].party_att

    # scroll through and add seats until the value is below threshold.
    if (current_party == "SPL" or current_party == "OTH" ):
        continue
        # Skip OTHER, SPOILED quotients.
    elif ( (all_parties[current_party].vote_share*total_seats_assigned)-all_parties[current_party].seats_assigned) > 1 :
        # if this seat is not independent or "spoiled",
        # and is owed seats >1 then give it an extra seat:
        all_parties[current_party].seats_assigned += 1
        total_seats_assigned += 1
    else:
        pass

In [41]:
Standings_final = pd.DataFrame({"Party": list( all_parties.keys() ),
                      "Seats_initial"  :[ all_parties[p].Seats_initial  for p in all_parties],
                      "Votes"          :[ all_parties[p].Votes          for p in all_parties ],
                      "Vote_share"     :[ all_parties[p].vote_share     for p in all_parties],
                      "Seats_final"    :[ all_parties[p].seats_assigned for p in all_parties ],
                      "Seat_share"     :[ (all_parties[p].seats_assigned)/total_seats_assigned  for p in all_parties]                                
                      } )


In [42]:
Standings_final

Unnamed: 0,Party,Seats_initial,Votes,Vote_share,Seats_final,Seat_share
0,BLQ,4,891425.0,0.060136,22,0.059459
1,CON,166,5835270.0,0.393652,166,0.448649
2,LIB,34,2783076.0,0.187749,69,0.186486
3,NDP,103,4512411.0,0.304411,112,0.302703
4,OTH,1,701798.0,0.047344,1,0.002703
5,SPL,0,99428.0,0.006707,0,0.0


In [43]:
#and output to file:
Standings_final.round(2).to_csv(f_standings_out, sep="\t")