In [1]:
!pip install pyrankvote



In [1]:
import pyrankvote
import ast
import pandas as pd
from pyrankvote import Candidate, Ballot


def rename_index_col_to_ballot_id(df):
    df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
    return df

def get_cand_list(df):
    list_of_unique_vals_in_dataframe = df.iloc[:, 1].value_counts().index
    cand_list = list(filter(lambda x: x != '0', list_of_unique_vals_in_dataframe))
    return cand_list

def initialize_cand_objs(cand_list):
    if isinstance(cand_list, str):
        return [Candidate(c) for c in cand_list.split(', ')]
    else:
        return [Candidate(c) for c in cand_list]

def initialize_cand_objs_in_df_cols(df):
    cands_df = df.iloc[:, 1:]  # getting all columns with candidate names in them
    cands_df = cands_df.applymap(lambda x: create_cand(x))  # making them into pyrankvote candidate objects
    cands_df['ballot_id'] = df.iloc[:, 0]  # stitching df back with ballot_ids from old dataframe
    return cands_df

def get_cands_into_single_cell(df):
    df['candidate_list'] = df.iloc[:, 1:].agg(", ".join, axis=1)
    return df

def initialize_ballot_objs(df):
    ballot_objects = []
    for index, value in enumerate(df['candidate_list']):
        ballot = Ballot(ranked_candidates=value)
        ballot_objects.append(ballot)
    return ballot_objects

def run_election(list_of_cand_objs, election_df):
    return pyrankvote.instant_runoff_voting(list_of_cand_objs, election_df['ballots'], pick_random_if_blank=True)

def rm_invalid_rows(df):
    return df[df['candidate_list'] != '0']

def run_all_steps(df):
    election_dict = {}
    
    df = df.dropna(axis=1, how='all')
    df = df.drop(['ballot_id','filename'], axis=1)
    print('df.shape',df.shape)
    df = get_cands_into_single_cell(df)

    df['candidate_list'] = df['candidate_list'].apply(lambda x: x.replace('0, ', '').replace(', 0', ''))
    df = rm_invalid_rows(df)
    df['candidate_list'] = df['candidate_list'].apply(lambda x: initialize_cand_objs(x))

    ballots = initialize_ballot_objs(df)
    df['ballots'] = ballots
    

    cand_list = get_cand_list(df)
    cand_list = initialize_cand_objs(cand_list)
    print("cand_list: ",cand_list)
#     print(df.head())
    election = run_election(cand_list, df)
    winner = election.get_winners()
    print("get_winners",winner)
    election_dict['winner'] = winner
    #df["irv_winner"] = winner
    print("rounds",len(election.rounds))
    election_dict['rounds'] = len(election.rounds)

#     open file and write results. read winners votes from file and write to df
    #print("register_round_results",election.register_round_results(election.rounds[-1]))

    print("election:", election)
    election_dict['election'] = election

#     print("election type:", type(election))
    election_dict['register']=election.register_round_results(election.rounds[-1])

    #print('hi')
    return df, election_dict




def read_votes(file, election_dict):
    with open(file, 'r') as file1:
        count = 0
        while True: 
            count += 1

            # Get next line from file 
            line = file1.readline()
            if not line: 
                break
            strip_line = line.strip()
    #         print('*',strip_line,'*')


            if 'ROUND' in strip_line or 'FINAL' in strip_line:
                start_round = True
                if 'ROUND 1' in strip_line or 'FINAL' in strip_line:
                    monitor_round=True
                    print('*',strip_line,'*')
                else:
                    monitor_round=False
            if 'ROUND' not in strip_line and  'FINAL' not in strip_line:
                start_round = False
                if winner in line and monitor_round:
                    print('*',strip_line,'*')
                    if 'firstRoundVotes' not in election_dict:
                        election_dict['firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                    else:
                        election_dict['finalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                    
    file1.close() 
    


In [2]:
df = pd.read_csv('master_elections.csv')
df = rename_index_col_to_ballot_id(df)
# df['irv_winner']=""
df = df[(df['filename']!="election_05-04-2020_08-39-40_5cands_12noise.csv") | (df['ballot_id']!=2767)]
df = df[(df['filename']!="election_04-30-2020_21-07-57_4_0.01.csv") | (df['ballot_id']!=20726)]
df = df[(df['filename']!="election_05-04-2020_08-40-23_5cands_16noise.csv") | (df['ballot_id']!=41363)]
master_df = df.copy(deep=True)

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
pd.set_option('max_colwidth', 8000)
pd.options.display.max_columns = None
pd.options.display.float_format = '{:.4f}'.format
pd.options.display.max_rows = 10000

In [4]:
election_ids= df['filename'].unique()
#election_ids

In [5]:
all_elections_dict = {}

In [6]:
df.head()

Unnamed: 0,ballot_id,candidate_1,candidate_2,candidate_3,candidate_4,candidate_5,candidate_6,filename
0,0,candidate_1,candidate_2,candidate_3,candidate_4,candidate_5,,election_05-04-2020_08-40-23_5cands_16noise.csv
1,1,candidate_1,candidate_2,candidate_3,candidate_4,candidate_5,,election_05-04-2020_08-40-23_5cands_16noise.csv
2,2,candidate_1,candidate_2,candidate_3,candidate_4,candidate_5,,election_05-04-2020_08-40-23_5cands_16noise.csv
3,3,candidate_1,candidate_2,candidate_3,0,0,,election_05-04-2020_08-40-23_5cands_16noise.csv
4,4,candidate_1,candidate_2,candidate_3,candidate_4,candidate_5,,election_05-04-2020_08-40-23_5cands_16noise.csv


In [7]:
election_num_dict = {}
for election_num, election in enumerate(election_ids):
    election_num_dict[election_num] = election
    print(election_num, election)

0 election_05-04-2020_08-40-23_5cands_16noise.csv
1 election_04-30-2020_21-00-03_4_0.0225.csv
2 election_05-02-2020_08-18-17_5cands_3noise.csv
3 election_05-04-2020_08-38-34_4cands_12noise.csv
4 election_04-30-2020_20-59-34_4_0.0375.csv
5 election_04-30-2020_21-06-11_4_0.0125.csv
6 election_04-30-2020_21-08-19_3_0.013333333333333334.csv
7 election_05-04-2020_08-40-33_4cands_10noise.csv
8 election_05-02-2020_08-18-31_3cands_18noise.csv
9 election_04-30-2020_21-04-28_6_0.006666666666666667.csv
10 election_04-30-2020_21-08-00_4_0.0275.csv
11 election_05-04-2020_08-39-40_5cands_12noise.csv
12 election_04-30-2020_21-07-57_4_0.01.csv
13 election_04-30-2020_21-08-54_3_0.02.csv
14 election_05-02-2020_08-19-42_3cands_2noise.csv
15 election_05-04-2020_08-40-11_5cands_8noise.csv
16 election_05-04-2020_08-40-56_5cands_17noise.csv
17 election_05-02-2020_08-17-43_3cands_11noise.csv
18 election_05-02-2020_08-19-53_3cands_16noise.csv
19 election_05-04-2020_08-41-05_4cands_3noise.csv
20 election_05-02-

Check:
Election 11,12 failed

In [8]:
for election_num, election in enumerate(election_ids):
#     election_dict[election]=
    print(election_num, election)
#     if election_num ==11:
    print("***********Election: ", election_num)
    df_election = df.loc[df['filename']==election]
    #print(df_election.shape)

    temp_df,election_dict = run_all_steps(df_election)
    print(election_dict)
    if len(election_dict['winner'])==1:
        winner = election_dict['winner'][0].name
        election_dict['winner'] = winner
    else:
        print(election, "has more than one winner")
    result_file = 'out.txt'
    with open(result_file, 'w') as file1:
        print(election_dict['election'], file=file1)
    read_votes(result_file, election_dict)
    all_elections_dict[election_num] = election_dict


0 election_05-04-2020_08-40-23_5cands_16noise.csv
***********Election:  0
df.shape (41363, 5)
cand_list:  [<Candidate('candidate_1')>, <Candidate('candidate_5')>, <Candidate('candidate_2')>, <Candidate('candidate_3')>, <Candidate('candidate_4')>]
get_winners [<Candidate('candidate_1')>]
rounds 4
election: ROUND 1
Candidate      Votes  Status
-----------  -------  --------
candidate_1     9129  Hopeful
candidate_5     9010  Hopeful
candidate_2     8455  Hopeful
candidate_3     7911  Hopeful
candidate_4     6858  Rejected

ROUND 2
Candidate      Votes  Status
-----------  -------  --------
candidate_1    10903  Hopeful
candidate_5    10640  Hopeful
candidate_3    10078  Hopeful
candidate_2     9742  Rejected
candidate_4        0  Rejected

ROUND 3
Candidate      Votes  Status
-----------  -------  --------
candidate_1    14069  Hopeful
candidate_3    13675  Hopeful
candidate_5    13619  Rejected
candidate_2        0  Rejected
candidate_4        0  Rejected

FINAL RESULT
Candidate      Vo

In [9]:
all_elections_dict

{0: {'winner': 'candidate_1',
  'rounds': 4,
  'election': <ElectionResults(5 rounds)>,
  'register': None,
  'firstRoundVotes': 9129,
  'finalRoundVotes': 20717},
 1: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(4 rounds)>,
  'register': None,
  'firstRoundVotes': 13918,
  'finalRoundVotes': 22957},
 2: {'winner': 'candidate_4',
  'rounds': 4,
  'election': <ElectionResults(5 rounds)>,
  'register': None,
  'firstRoundVotes': 7502,
  'finalRoundVotes': 17366},
 3: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(4 rounds)>,
  'register': None,
  'firstRoundVotes': 3400,
  'finalRoundVotes': 5749},
 4: {'winner': 'candidate_2',
  'rounds': 3,
  'election': <ElectionResults(4 rounds)>,
  'register': None,
  'firstRoundVotes': 11236,
  'finalRoundVotes': 25001},
 5: {'winner': 'candidate_4',
  'rounds': 3,
  'election': <ElectionResults(4 rounds)>,
  'register': None,
  'firstRoundVotes': 2637,
  'finalRoundVotes': 4774},
 6: {'winner': '

In [12]:
election_results_df=pd.DataFrame()
election_results_list=[]

In [13]:
for key,val in all_elections_dict.items():
    val['election_id']=election_num_dict[key]
    del val['register']
    del val['election']
    print(val)

    election_results_list.append(val)
    

{'winner': 'candidate_1', 'rounds': 4, 'firstRoundVotes': 9129, 'finalRoundVotes': 20693, 'election_id': 'election_05-04-2020_08-40-23_5cands_16noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'firstRoundVotes': 13918, 'finalRoundVotes': 22931, 'election_id': 'election_04-30-2020_21-00-03_4_0.0225.csv'}
{'winner': 'candidate_4', 'rounds': 4, 'firstRoundVotes': 7502, 'finalRoundVotes': 17359, 'election_id': 'election_05-02-2020_08-18-17_5cands_3noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'firstRoundVotes': 3400, 'finalRoundVotes': 5760, 'election_id': 'election_05-04-2020_08-38-34_4cands_12noise.csv'}
{'winner': 'candidate_2', 'rounds': 3, 'firstRoundVotes': 11236, 'finalRoundVotes': 24984, 'election_id': 'election_04-30-2020_20-59-34_4_0.0375.csv'}
{'winner': 'candidate_4', 'rounds': 3, 'firstRoundVotes': 2637, 'finalRoundVotes': 4770, 'election_id': 'election_04-30-2020_21-06-11_4_0.0125.csv'}
{'winner': 'candidate_3', 'rounds': 2, 'firstRoundVotes': 8199, 'finalRoundVotes': 

In [14]:
election_results_df = pd.DataFrame(election_results_list)
election_results_df.to_csv('election_results.csv', index=False)

In [15]:
election_results_df

Unnamed: 0,winner,rounds,firstRoundVotes,finalRoundVotes,election_id
0,candidate_1,4,9129,20693,election_05-04-2020_08-40-23_5cands_16noise.csv
1,candidate_1,3,13918,22931,election_04-30-2020_21-00-03_4_0.0225.csv
2,candidate_4,4,7502,17359,election_05-02-2020_08-18-17_5cands_3noise.csv
3,candidate_1,3,3400,5760,election_05-04-2020_08-38-34_4cands_12noise.csv
4,candidate_2,3,11236,24984,election_04-30-2020_20-59-34_4_0.0375.csv
5,candidate_4,3,2637,4770,election_04-30-2020_21-06-11_4_0.0125.csv
6,candidate_3,2,8199,9857,election_04-30-2020_21-08-19_3_0.013333333333333334.csv
7,candidate_3,3,8907,17220,election_05-04-2020_08-40-33_4cands_10noise.csv
8,candidate_2,2,2034,3085,election_05-02-2020_08-18-31_3cands_18noise.csv
9,candidate_3,5,4260,12651,election_04-30-2020_21-04-28_6_0.006666666666666667.csv
