In [1]:
import pyrankvote
import ast
import pandas as pd
from pyrankvote import Candidate, Ballot


def get_cand_list(df):
    list_of_unique_vals_in_dataframe = df.iloc[:, 1].value_counts().index
    cand_list = list(filter(lambda x: x != '0', list_of_unique_vals_in_dataframe))
    return cand_list

def initialize_cand_objs(cand_list):
    if isinstance(cand_list, str):
        return [Candidate(c) for c in cand_list.split(', ')]
    else:
        return [Candidate(c) for c in cand_list]

def initialize_cand_objs_in_df_cols(df):
    cands_df = df.iloc[:, 1:]  # getting all columns with candidate names in them
    cands_df = cands_df.applymap(lambda x: create_cand(x))  # making them into pyrankvote candidate objects
    cands_df['ballot_id'] = df.iloc[:, 0]  # stitching df back with ballot_ids from old dataframe
    return cands_df

def get_cands_into_single_cell(df):
    df['candidate_list'] = df.iloc[:, 1:].agg(", ".join, axis=1) #this excludes the first candidates??
    return df

def initialize_ballot_objs(df):
    ballot_objects = []
    for index, value in enumerate(df['candidate_list']):
        ballot = Ballot(ranked_candidates=value)
        ballot_objects.append(ballot)
    return ballot_objects

def run_election(list_of_cand_objs, election_df):
    return pyrankvote.instant_runoff_voting(list_of_cand_objs, election_df['ballots'], pick_random_if_blank=True)

def rm_invalid_rows(df):
    return df[df['candidate_list'] != '0']


def main(df):
    
    # creating dictionary with including winners, rounds and election number:
    election_dict = {}
    
    #renaming column to ballot_id
    df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
    
    #droping nans, 
    df = df.dropna(axis=1, how='all')
    df = df.drop(['ballot_id','filename'], axis=1)
    print('df.shape',df.shape)
        
    #aggregating all cand into one cell
    df = get_cands_into_single_cell(df)
    
    #Removing 0 with empty space:
    df['candidate_list'] = df['candidate_list'].apply(lambda x: x.replace('0, ', '').replace(', 0', ''))
    
    #Removing invalid rows:
    df = rm_invalid_rows(df)
    
    #for all srings making into pyrank candidate objects:
    df['candidate_list'] = df['candidate_list'].apply(lambda x: initialize_cand_objs(x))

    #creating ballot objects column for each candidate in candidate list:
    ballots = initialize_ballot_objs(df)
    df['ballots'] = ballots

    #getting unique list of candidates:
    cand_list = get_cand_list(df)
    cand_list = initialize_cand_objs(cand_list)
    print("cand_list: ",cand_list)
    
    #running pyrank election and getting winners:
    election = run_election(cand_list, df)
    winner = election.get_winners()
    
    #Dictionary keys and values *************************
    #printing winners and add dictionary key:
    print("get_winners",winner)
    election_dict['winner'] = winner
    
    # printing rounds and add dictionary key:
    print("rounds",len(election.rounds))
    election_dict['rounds'] = len(election.rounds)
    
    #print elections and add dictionary key:
    print("election:", election)
    election_dict['election'] = election

        
    #will return df and dictionary for us to read votes
    return df, election_dict
    

In [2]:
df = pd.read_csv('master_elections.csv')
df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
df = df[(df['filename']!="election_05-04-2020_08-39-40_5cands_12noise.csv") | (df['ballot_id']!=2767)]
df = df[(df['filename']!="election_04-30-2020_21-07-57_4_0.01.csv") | (df['ballot_id']!=20726)]
df = df[(df['filename']!="election_05-04-2020_08-40-23_5cands_16noise.csv") | (df['ballot_id']!=41363)]
master_df = df.copy(deep=True)


election_ids = df.filename.unique()

election_num_dict = {}
for election_num, election in enumerate(election_ids):
    election_num_dict[election_num] = election

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
def read_votes(file, election_dict):
    with open(file, 'r') as file1:
        count = 0
        while True: 
            count += 1

            # Get next line from file 
            line = file1.readline()
            if not line: 
                break
            strip_line = line.strip()
            
            if 'ROUND' in strip_line or "FINAL" in strip_line:
                start_round = True
                if "ROUND 1" in strip_line:
                    monitor_round1 = True
                else:
                    monitor_round1 = False
                if "ROUND 2" in strip_line:
                    monitor_round2 = True
                else:
                    monitor_round2 = False
                if "ROUND 3" in strip_line:
                    monitor_round3 = True
                else:
                    monitor_round3 = False
                if "ROUND 4" in strip_line:
                    monitor_round4 = True
                else:
                    monitor_round4 = False
#                 if "ROUND 5" in strip_line:
#                     monitor_round5 = True
#                 else:
#                     monitor_round5 = False
                if "FINAL" in strip_line:
                    final_round =True
                else:
                    final_round=False
                
            if 'ROUND' not in strip_line and 'FINAL' not in strip_line:
                start_round = False
                if winner in line and monitor_round1:
                    if 'firstRoundVotes' not in election_dict:
                        election_dict['firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if winner in line and monitor_round2:
                    if 'secondRoundVotes' not in election_dict:
                        election_dict['secondRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if winner in line and monitor_round3:
                    if 'thirdRoundVotes' not in election_dict:
                        election_dict['thirdRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if winner in line and monitor_round4:
                    if 'fourthRoundVotes' not in election_dict:
                        election_dict['fourthRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()  
                if winner in line and final_round:
                    if 'finalRoundVotes' not in election_dict:
                        election_dict['finalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()      
                    
                    
        file1.close()               
                    
                    
            
    
# rounds = ['ROUND 1', 'ROUND 2', 'ROUND 3',.....,'FINAL ROUND']
# for i in range (len (rounds)):
#     print("round {}.{}".format(i + 1, rounds[i]))
    

In [7]:
#building dictionary
all_elect_dict={}


#Enumerating through the election and election_num in the election_id array and applying main function while printing:
for election_num, election in enumerate(election_ids):
    
    print(election_num, election)
    print("***********Election: ", election_num)
    df_election = df.loc[df['filename']==election]
    
 
    temp_df,election_dict = main(df_election)
    print(election_dict)

    
    if len(election_dict['winner'])==1:
        winner = election_dict['winner'][0].name
        election_dict['winner'] = winner
    else:
        print(election, "has more than one winner")
    result_file = 'out.txt'
    
    with open(result_file, 'w') as file1:
        print(election_dict['election'], file=file1)
    read_votes(result_file, election_dict) #<<<==== where we actually call read_votes
    all_elect_dict[election_num] = election_dict

0 election_05-04-2020_08-40-23_5cands_16noise.csv
***********Election:  0
df.shape (41363, 5)
cand_list:  [<Candidate('candidate_1')>, <Candidate('candidate_5')>, <Candidate('candidate_2')>, <Candidate('candidate_3')>, <Candidate('candidate_4')>]
get_winners [<Candidate('candidate_1')>]
rounds 4
election: ROUND 1
Candidate      Votes  Status
-----------  -------  --------
candidate_1     9129  Hopeful
candidate_5     9010  Hopeful
candidate_2     8455  Hopeful
candidate_3     7911  Hopeful
candidate_4     6858  Rejected

ROUND 2
Candidate      Votes  Status
-----------  -------  --------
candidate_1    10903  Hopeful
candidate_5    10640  Hopeful
candidate_3    10078  Hopeful
candidate_2     9742  Rejected
candidate_4        0  Rejected

ROUND 3
Candidate      Votes  Status
-----------  -------  --------
candidate_1    14070  Hopeful
candidate_3    13675  Hopeful
candidate_5    13618  Rejected
candidate_2        0  Rejected
candidate_4        0  Rejected

FINAL RESULT
Candidate      Vo

In [8]:
all_elect_dict

{0: {'winner': 'candidate_1',
  'rounds': 4,
  'election': <ElectionResults(4 rounds)>,
  'firstRoundVotes': 9129,
  'secondRoundVotes': 10903,
  'thirdRoundVotes': 14070,
  'finalRoundVotes': 20697},
 1: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'firstRoundVotes': 13918,
  'secondRoundVotes': 16901,
  'finalRoundVotes': 22934},
 2: {'winner': 'candidate_4',
  'rounds': 4,
  'election': <ElectionResults(4 rounds)>,
  'firstRoundVotes': 7502,
  'secondRoundVotes': 9065,
  'thirdRoundVotes': 11522,
  'finalRoundVotes': 17361},
 3: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'firstRoundVotes': 3400,
  'secondRoundVotes': 4475,
  'finalRoundVotes': 5756},
 4: {'winner': 'candidate_2',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'firstRoundVotes': 11236,
  'secondRoundVotes': 15597,
  'finalRoundVotes': 24970},
 5: {'winner': 'candidate_4',
  'rounds': 3,
  'election': <ElectionResults(3 ro

In [10]:
# rounds = [1,2,3,4]
# print("round {}".format(rounds[i] for i in len(rounds)))

In [9]:
# rounds = ['ROUND 1', 'ROUND 2', 'ROUND 3', 'FINAL ROUND']
# for i in range (len (rounds)):
#     print("round {}.{}".format(i + 1, rounds[i]))

In [11]:
election_results_df=pd.DataFrame()
election_results_list=[]

In [14]:
for key,val in all_elect_dict.items():
    val['election_id']=election_num_dict[key]
    print(val)

    election_results_list.append(val)

{'winner': 'candidate_1', 'rounds': 4, 'election': <ElectionResults(4 rounds)>, 'firstRoundVotes': 9129, 'secondRoundVotes': 10903, 'thirdRoundVotes': 14070, 'finalRoundVotes': 20697, 'election_id': 'election_05-04-2020_08-40-23_5cands_16noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'firstRoundVotes': 13918, 'secondRoundVotes': 16901, 'finalRoundVotes': 22934, 'election_id': 'election_04-30-2020_21-00-03_4_0.0225.csv'}
{'winner': 'candidate_4', 'rounds': 4, 'election': <ElectionResults(4 rounds)>, 'firstRoundVotes': 7502, 'secondRoundVotes': 9065, 'thirdRoundVotes': 11522, 'finalRoundVotes': 17361, 'election_id': 'election_05-02-2020_08-18-17_5cands_3noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'firstRoundVotes': 3400, 'secondRoundVotes': 4475, 'finalRoundVotes': 5756, 'election_id': 'election_05-04-2020_08-38-34_4cands_12noise.csv'}
{'winner': 'candidate_2', 'rounds': 3, 'election': <ElectionR

In [15]:
election_results_df = pd.DataFrame(election_results_list)
election_results_df.to_csv('election_results.csv', index=False)

In [16]:
election_results_df

Unnamed: 0,winner,rounds,election,firstRoundVotes,secondRoundVotes,thirdRoundVotes,finalRoundVotes,election_id,fourthRoundVotes
0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,9129,10903.0,14070.0,20697,election_05-04-2020_08-40-23_5cands_16noise.csv,
1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13918,16901.0,,22934,election_04-30-2020_21-00-03_4_0.0225.csv,
2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,7502,9065.0,11522.0,17361,election_05-02-2020_08-18-17_5cands_3noise.csv,
3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,3400,4475.0,,5756,election_05-04-2020_08-38-34_4cands_12noise.csv,
4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,11236,15597.0,,24970,election_04-30-2020_20-59-34_4_0.0375.csv,
5,candidate_4,3,ROUND 1\nCandidate Votes Status\n-------...,2637,3052.0,,4765,election_04-30-2020_21-06-11_4_0.0125.csv,
6,candidate_3,2,ROUND 1\nCandidate Votes Status\n-------...,8199,,,9852,election_04-30-2020_21-08-19_3_0.0133333333333...,
7,candidate_3,3,ROUND 1\nCandidate Votes Status\n-------...,8907,11449.0,,17242,election_05-04-2020_08-40-33_4cands_10noise.csv,
8,candidate_2,2,ROUND 1\nCandidate Votes Status\n-------...,2034,,,3091,election_05-02-2020_08-18-31_3cands_18noise.csv,
9,candidate_3,5,ROUND 1\nCandidate Votes Status\n-------...,4260,5030.0,6496.0,12653,election_04-30-2020_21-04-28_6_0.0066666666666...,8630.0


In [20]:
election_results_df.fillna(0, inplace=True)

In [21]:
df = election_results_df 

In [22]:
df['TotalVotesWinner'] = df['firstRoundVotes']+df['secondRoundVotes']+df['thirdRoundVotes']+df['fourthRoundVotes']+df['finalRoundVotes']


In [24]:
df.drop(columns=['firstRoundVotes','secondRoundVotes','thirdRoundVotes','finalRoundVotes','fourthRoundVotes'],inplace=True)

In [25]:
df.head()

Unnamed: 0,winner,rounds,election,election_id,TotalVotesWinner
0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,election_05-04-2020_08-40-23_5cands_16noise.csv,54799.0
1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,election_04-30-2020_21-00-03_4_0.0225.csv,53753.0
2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,election_05-02-2020_08-18-17_5cands_3noise.csv,45450.0
3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,election_05-04-2020_08-38-34_4cands_12noise.csv,13631.0
4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,election_04-30-2020_20-59-34_4_0.0375.csv,51803.0
