In [6]:
import pyrankvote
import ast
import pandas as pd
from pyrankvote import Candidate, Ballot


def get_cand_list(df):
    list_of_unique_vals_in_dataframe = df.iloc[:, 1].value_counts().index
    cand_list = list(filter(lambda x: x != '0', list_of_unique_vals_in_dataframe))
    return cand_list

def initialize_cand_objs(cand_list):
    if isinstance(cand_list, str):
        return [Candidate(c) for c in cand_list.split(', ')]
    else:
        return [Candidate(c) for c in cand_list]

def initialize_cand_objs_in_df_cols(df):
    cands_df = df.iloc[:, 1:]  # getting all columns with candidate names in them
    cands_df = cands_df.applymap(lambda x: create_cand(x))  # making them into pyrankvote candidate objects
    cands_df['ballot_id'] = df.iloc[:, 0]  # stitching df back with ballot_ids from old dataframe
    return cands_df

def get_cands_into_single_cell(df):
    df['candidate_list'] = df.iloc[:, 1:].agg(", ".join, axis=1) #this excludes the first candidates??
    return df

def initialize_ballot_objs(df):
    ballot_objects = []
    for index, value in enumerate(df['candidate_list']):
        ballot = Ballot(ranked_candidates=value)
        ballot_objects.append(ballot)
    return ballot_objects

def run_election(list_of_cand_objs, election_df):
    return pyrankvote.instant_runoff_voting(list_of_cand_objs, election_df['ballots'], pick_random_if_blank=True)

def rm_invalid_rows(df):
    return df[df['candidate_list'] != '0']


def main(df):
    
    # creating dictionary with including winners, rounds and election number:
    election_dict = {}
    
    #renaming column to ballot_id
    df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
    
    #droping nans, 
    df = df.dropna(axis=1, how='all')
    df = df.drop(['ballot_id','filename'], axis=1)
    print('df.shape',df.shape)
        
    #aggregating all cand into one cell
    df = get_cands_into_single_cell(df)
    
    #Removing 0 with empty space:
    df['candidate_list'] = df['candidate_list'].apply(lambda x: x.replace('0, ', '').replace(', 0', ''))
    
    #Removing invalid rows:
    df = rm_invalid_rows(df)
    
    #for all srings making into pyrank candidate objects:
    df['candidate_list'] = df['candidate_list'].apply(lambda x: initialize_cand_objs(x))

    #creating ballot objects column for each candidate in candidate list:
    ballots = initialize_ballot_objs(df)
    df['ballots'] = ballots

    #getting unique list of candidates:
    cand_list = get_cand_list(df)
    cand_list = initialize_cand_objs(cand_list)
    print("cand_list: ",cand_list)
    
    #running pyrank election and getting winners:
    election = run_election(cand_list, df)
    winner = election.get_winners()
    
    #Dictionary keys and values *************************
    #printing winners and add dictionary key:
    print("get_winners",winner)
    election_dict['winner'] = winner
    
    # printing rounds and add dictionary key:
    print("rounds",len(election.rounds))
    election_dict['rounds'] = len(election.rounds)
    
    #print elections and add dictionary key:
    print("election:", election)
    election_dict['election'] = election
    
    #will return df and dictionary for us to read votes
    return df, election_dict
    


def read_votes(file, election_dict):
    with open(file, 'rt') as f:
        for line in f:
                line = f.readline()
                if not line: 
                    break
                strip_line = line.strip()
                    
                if "Elected" in strip_line and winner in strip_line:
                    print('*',strip_line,'*')
                    if 'WinnerTotalVotes' not in election_dict:
                        election_dict['WinnerTotalVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
#                 if "candidate_1" in strip_line:
#                     election_dict['candidate1_totalvotes'] = sum([int(s) for s in strip_line.split() if s.isdigit()].pop())
                
            
                        
                
#     f.close() 


# import re

# def read_votes(file, election_dict):
#     linenum = 0
#     get_winner = "get_winners"
#     with open(file, 'rt') as f:
#         for line in f:
#             linenum +=1
#             if line.lower().find(get_winner) != -1:
#                 election_dict['Winner'] = [int(s) for s in line.split() if s.isdigit()].pop()


#     file.close() 


# def read_votes(file, election_dict):
    
#     lines = []
    
#     with open(file, 'rt') as f:
#         for line in f:
#             for lines in file:
#                 lines.append(line.rstrip("\n"))
                
#     index = 0
#     prev = 0
#     string = lines
#     subsstr = "Hopeful"
#     while index < len(str):
#         index = string.find(substr, index)
#         if index == -1:
#             break
#         print(" " * (index -prev))
#         prev = index + len(substr)
#         index += len(substr)
        
#     election_dict['WinnerTotalVotes'] = 



In [7]:
df = pd.read_csv('master_elections.csv')
df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
df = df[(df['filename']!="election_05-04-2020_08-39-40_5cands_12noise.csv") | (df['ballot_id']!=2767)]
df = df[(df['filename']!="election_04-30-2020_21-07-57_4_0.01.csv") | (df['ballot_id']!=20726)]
df = df[(df['filename']!="election_05-04-2020_08-40-23_5cands_16noise.csv") | (df['ballot_id']!=41363)]
master_df = df.copy(deep=True)

In [8]:
election_ids = df.filename.unique()

In [9]:
#Building dictionary that consists of election and election_num in the election_id array:

election_num_dict = {}
for election_num, election in enumerate(election_ids):
    election_num_dict[election_num] = election
#     print(election_num, election)

In [10]:
#building dictionary
all_elect_dict={}


#Enumerating through the election and election_num in the election_id array and applying main function while pringting:
for election_num, election in enumerate(election_ids):
    
    print(election_num, election)
    print("***********Election: ", election_num)
    df_election = df.loc[df['filename']==election]
  

    temp_df,election_dict = main(df_election)
    print(election_dict)
    
    if len(election_dict['winner'])==1:
        winner = election_dict['winner'][0].name
        election_dict['winner'] = winner
    else:
        print(election, "has more than one winner")
    result_file = 'out.txt'
    
    with open(result_file, 'w') as file1:
        print(election_dict['election'], file=file1)
    read_votes(result_file, election_dict)
    all_elect_dict[election_num] = election_dict


0 election_05-04-2020_08-40-23_5cands_16noise.csv
***********Election:  0
df.shape (41363, 5)
cand_list:  [<Candidate('candidate_1')>, <Candidate('candidate_5')>, <Candidate('candidate_2')>, <Candidate('candidate_3')>, <Candidate('candidate_4')>]
get_winners [<Candidate('candidate_1')>]
rounds 4
election: ROUND 1
Candidate      Votes  Status
-----------  -------  --------
candidate_1     9129  Hopeful
candidate_5     9010  Hopeful
candidate_2     8455  Hopeful
candidate_3     7911  Hopeful
candidate_4     6858  Rejected

ROUND 2
Candidate      Votes  Status
-----------  -------  --------
candidate_1    10902  Hopeful
candidate_5    10641  Hopeful
candidate_3    10078  Hopeful
candidate_2     9742  Rejected
candidate_4        0  Rejected

ROUND 3
Candidate      Votes  Status
-----------  -------  --------
candidate_1    14068  Hopeful
candidate_3    13675  Hopeful
candidate_5    13620  Rejected
candidate_2        0  Rejected
candidate_4        0  Rejected

FINAL RESULT
Candidate      Vo

In [11]:
all_elect_dict

{0: {'winner': 'candidate_1',
  'rounds': 4,
  'election': <ElectionResults(4 rounds)>},
 1: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'WinnerTotalVotes': 22970},
 2: {'winner': 'candidate_4',
  'rounds': 4,
  'election': <ElectionResults(4 rounds)>},
 3: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'WinnerTotalVotes': 5757},
 4: {'winner': 'candidate_2',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'WinnerTotalVotes': 24976},
 5: {'winner': 'candidate_4',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'WinnerTotalVotes': 4774},
 6: {'winner': 'candidate_3',
  'rounds': 2,
  'election': <ElectionResults(2 rounds)>},
 7: {'winner': 'candidate_3',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'WinnerTotalVotes': 17216},
 8: {'winner': 'candidate_2',
  'rounds': 2,
  'election': <ElectionResults(2 rounds)>},
 9: {'winner': 'candidate_3',
  'rounds': 5,
  'election

In [12]:
election_results_df=pd.DataFrame()
election_results_list=[]

In [13]:
for key,val in all_elect_dict.items():
    val['election_id']=election_num_dict[key]
    print(val)

    election_results_df.append(val, ignore_index=True)

{'winner': 'candidate_1', 'rounds': 4, 'election': <ElectionResults(4 rounds)>, 'election_id': 'election_05-04-2020_08-40-23_5cands_16noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'WinnerTotalVotes': 22970, 'election_id': 'election_04-30-2020_21-00-03_4_0.0225.csv'}
{'winner': 'candidate_4', 'rounds': 4, 'election': <ElectionResults(4 rounds)>, 'election_id': 'election_05-02-2020_08-18-17_5cands_3noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'WinnerTotalVotes': 5757, 'election_id': 'election_05-04-2020_08-38-34_4cands_12noise.csv'}
{'winner': 'candidate_2', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'WinnerTotalVotes': 24976, 'election_id': 'election_04-30-2020_20-59-34_4_0.0375.csv'}
{'winner': 'candidate_4', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'WinnerTotalVotes': 4774, 'election_id': 'election_04-30-2020_21-06-11_4_0.0125.csv'}
{'winner': 'candidate_3', 'rounds': 2

In [130]:
election_results_df = pd.DataFrame(election_results_list)
election_results_df.to_csv('election_results.csv', index=False)

In [131]:
election_results_df.head()