In [1]:
# !pip install pyrankvote
# !pip install pandas

In [2]:
import pyrankvote
import ast
import pandas as pd
from pyrankvote import Candidate, Ballot


def get_cand_list(df):
    list_of_unique_vals_in_dataframe = df.iloc[:, 1].value_counts().index
    cand_list = list(filter(lambda x: x != '0', list_of_unique_vals_in_dataframe))
    return cand_list

def initialize_cand_objs(cand_list):
    if isinstance(cand_list, str):
        return [Candidate(c) for c in cand_list.split(', ')]
    else:
        return [Candidate(c) for c in cand_list]

def initialize_cand_objs_in_df_cols(df):
    cands_df = df.iloc[:, 1:]  # getting all columns with candidate names in them
    cands_df = cands_df.applymap(lambda x: create_cand(x))  # making them into pyrankvote candidate objects
    cands_df['ballot_id'] = df.iloc[:, 0]  # stitching df back with ballot_ids from old dataframe
    return cands_df

def get_cands_into_single_cell(df):
    df['candidate_list'] = df.iloc[:, 1:].agg(", ".join, axis=1) #this excludes the first candidates??
    return df

def initialize_ballot_objs(df):
    ballot_objects = []
    for index, value in enumerate(df['candidate_list']):
        ballot = Ballot(ranked_candidates=value)
        ballot_objects.append(ballot)
    return ballot_objects

def run_election(list_of_cand_objs, election_df):
    return pyrankvote.instant_runoff_voting(list_of_cand_objs, election_df['ballots'], pick_random_if_blank=True)

def rm_invalid_rows(df):
    return df[df['candidate_list'] != '0']


def main(df):
    
    # creating dictionary with including winners, rounds and election number:
    election_dict = {}
    
    #renaming column to ballot_id
    df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
    
    #droping nans, 
    df = df.dropna(axis=1, how='all')
    df = df.drop(['ballot_id','filename'], axis=1)
    print('df.shape',df.shape)
        
    #aggregating all cand into one cell
    df = get_cands_into_single_cell(df)
    
    #Removing 0 with empty space:
    df['candidate_list'] = df['candidate_list'].apply(lambda x: x.replace('0, ', '').replace(', 0', ''))
    
    #Removing invalid rows:
    df = rm_invalid_rows(df)
    
    #for all srings making into pyrank candidate objects:
    df['candidate_list'] = df['candidate_list'].apply(lambda x: initialize_cand_objs(x))

    #creating ballot objects column for each candidate in candidate list:
    ballots = initialize_ballot_objs(df)
    df['ballots'] = ballots

    #getting unique list of candidates:
    cand_list = get_cand_list(df)
    cand_list = initialize_cand_objs(cand_list)
    print("cand_list: ",cand_list)
    
    #running pyrank election and getting winners:
    election = run_election(cand_list, df)
    winner = election.get_winners()
    
    #Dictionary keys and values *************************
    #printing winners and add dictionary key:
    print("get_winners",winner)
    election_dict['winner'] = winner
    
    # printing rounds and add dictionary key:
    print("rounds",len(election.rounds))
    election_dict['rounds'] = len(election.rounds)
    
    #print elections and add dictionary key:
    print("election:", election)
    election_dict['election'] = election

        
    #will return df and dictionary for us to read votes
    return df, election_dict
    

In [3]:
df = pd.read_csv('master_elections.csv')
df.rename(columns={'Unnamed: 0': 'ballot_id'}, inplace=True)
df = df[(df['filename']!="election_05-04-2020_08-39-40_5cands_12noise.csv") | (df['ballot_id']!=2767)]
df = df[(df['filename']!="election_04-30-2020_21-07-57_4_0.01.csv") | (df['ballot_id']!=20726)]
df = df[(df['filename']!="election_05-04-2020_08-40-23_5cands_16noise.csv") | (df['ballot_id']!=41363)]
master_df = df.copy(deep=True)


election_ids = df.filename.unique()

election_num_dict = {}
for election_num, election in enumerate(election_ids):
    election_num_dict[election_num] = election

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
# import re

# def read_votes(file, election_dict):
#     found = False
#     with open(file, 'r') as file1:
#         for line in file1:
#             if not found:
#                 counter = 0
#                 for strip_line in line.split():
#                     counter = counter+1
#                     if 'ROUND' in strip_line or "FINAL" in strip_line:
                        
# #                         if "ROUND 1" in strip_line:
# #                             monitor_round1 = True
# #                         else:
# #                             monitor_round1 = False
# #                     if 'ROUND' not in line and 'FINAL' not in strip_line:
# #                         start_round = False    
#                         if 'candidate_1' in line:
#                             if 'can1firstRoundVotes' not in election_dict:
#                                 election_dict['can1firstRoundVotes']=[line.split()[counter]].pop()
#                                 found=True
# #                             elif 'candidate_2' in strip_line:
# #                                 if 'can2firstRoundVotes' not in election_dict:
# #                                     election_dict['can2firstRoundVotes']=[line.split()[counter]].pop()
# #                                     found=True
# #                             elif 'candidate_3' in strip_line:
# #                                 if 'can3firstRoundVotes' not in election_dict:
# #                                     election_dict['can3firstRoundVotes']=[line.split()[counter]].pop()
# #                                     found=True
# #                             elif 'candidate_4' in strip_line:
# #                                 if 'can4firstRoundVotes' not in election_dict:
# #                                     election_dict['can4firstRoundVotes']=[line.split()[counter]].pop()
# #                                     found=True
# #                             else:
# #                                 break


#         file1.close()  

        
# found = False
# with open("input.txt") as openfile:
#      for line in openfile:
#          if not found:
#              counter = 0
#              for part in line.split():
#                   counter = counter + 1
#                   if "ppl=" in part:
#                       print part
#                       print line.split()[counter]
#                       found = True

In [5]:
def read_votes(file, election_dict):
    with open(file, 'r') as file1:
        count = 0
        while True: 
            count += 1

            # Get next line from file 
            line = file1.readline()
            if not line: 
                break
            strip_line = line.strip()
            
            if 'ROUND' in strip_line or "FINAL" in strip_line:
                start_round = True
                if "ROUND 1" in strip_line:
                    monitor_round1 = True
                else:
                    monitor_round1 = False
                if "ROUND 2" in strip_line:
                    monitor_round2 = True
                else:
                    monitor_round2 = False
                if "ROUND 3" in strip_line:
                    monitor_round3 = True
                else:
                    monitor_round3 = False
                if "ROUND 4" in strip_line:
                    monitor_round4 = True
                else:
                    monitor_round4 = False
                if "FINAL" in strip_line:
                    final_round =True
                else:
                    final_round=False
                
            if 'ROUND' not in strip_line and 'FINAL' not in strip_line:
                start_round = False
                
#                 if winner in line and monitor_round1:
#                     if 'can1firstRoundVotes' not in election_dict:
#                         election_dict['can1firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                
                if 'candidate_1' in line and monitor_round1:
                    if 'can1firstRoundVotes' not in election_dict:
                        election_dict['can1firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_1' in line and monitor_round2:
                    if 'can1secondRoundVotes' not in election_dict:
                        election_dict['can1secondRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_1' in line and monitor_round3:
                    if 'can1thirdRoundVotes' not in election_dict:
                        election_dict['can1thirdRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_1' in line and monitor_round4:
                    if 'can1fourthRoundVotes' not in election_dict:
                        election_dict['can1fourthRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()  
                if 'candidate_1' in line and final_round:
                    if 'can1finalRoundVotes' not in election_dict:
                        election_dict['can1finalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()      
                        
                if 'candidate_2' in line and monitor_round1:
                    if 'can2firstRoundVotes' not in election_dict:
                        election_dict['can2firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_2' in line and monitor_round2:
                    if 'can2secondRoundVotes' not in election_dict:
                        election_dict['can2secondRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_2' in line and monitor_round3:
                    if 'can2thirdRoundVotes' not in election_dict:
                        election_dict['can2thirdRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_2' in line and monitor_round4:
                    if 'can2fourthRoundVotes' not in election_dict:
                        election_dict['can2fourthRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()  
                if 'candidate_2' in line and final_round:
                    if 'can2finalRoundVotes' not in election_dict:
                        election_dict['can2finalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()    
                
                if 'candidate_3' in line and monitor_round1:
                    if 'can3firstRoundVotes' not in election_dict:
                        election_dict['can3firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_3' in line and monitor_round2:
                    if 'can3secondRoundVotes' not in election_dict:
                        election_dict['can3secondRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_3' in line and monitor_round3:
                    if 'can3thirdRoundVotes' not in election_dict:
                        election_dict['can3thirdRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_3' in line and monitor_round4:
                    if 'can3fourthRoundVotes' not in election_dict:
                        election_dict['can3fourthRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()  
                if 'candidate_3' in line and final_round:
                    if 'can3finalRoundVotes' not in election_dict:
                        election_dict['can3finalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()    
                
                if 'candidate_4' in line and monitor_round1:
                    if 'can4firstRoundVotes' not in election_dict:
                        election_dict['can4firstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_4' in line and monitor_round2:
                    if 'can4secondRoundVotes' not in election_dict:
                        election_dict['can4secondRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_4' in line and monitor_round3:
                    if 'can4thirdRoundVotes' not in election_dict:
                        election_dict['can4thirdRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if 'candidate_4' in line and monitor_round4:
                    if 'can4fourthRoundVotes' not in election_dict:
                        election_dict['can4fourthRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()  
                if 'candidate_4' in line and final_round:
                    if 'can4finalRoundVotes' not in election_dict:
                        election_dict['can4finalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()    
                    
                #Winner round: 
                if winner in line and monitor_round1:
                    if 'winnerfirstRoundVotes' not in election_dict:
                        election_dict['winnerfirstRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if winner in line and monitor_round2:
                    if 'winnersecondRoundVotes' not in election_dict:
                        election_dict['winnersecondRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if winner in line and monitor_round3:
                    if 'winnerthirdRoundVotes' not in election_dict:
                        election_dict['winnerthirdRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()
                if winner in line and monitor_round4:
                    if 'winnerfourthRoundVotes' not in election_dict:
                        election_dict['winnerfourthRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()  
                if winner in line and final_round:
                    if 'winnerfinalRoundVotes' not in election_dict:
                        election_dict['winnerfinalRoundVotes']=[int(s) for s in strip_line.split() if s.isdigit()].pop()    
                    
                    
                
                    
                    
                    
                    
        file1.close()               
                    
                    
            
    
# rounds = ['ROUND 1', 'ROUND 2', 'ROUND 3',.....,'FINAL ROUND']
# for i in range (len (rounds)):
#     print("round {}.{}".format(i + 1, rounds[i]))


In [6]:
#building dictionary
all_elect_dict={}


#Enumerating through the election and election_num in the election_id array and applying main function while printing:
for election_num, election in enumerate(election_ids):
    
    print(election_num, election,' \n ')
    print("***********Election: ", election_num)
    df_election = df.loc[df['filename']==election]
    
 
    temp_df,election_dict = main(df_election)
    print(election_dict)

    
    if len(election_dict['winner'])==1:
        winner = election_dict['winner'][0].name
        election_dict['winner'] = winner
    else:
        print(election, "has more than one winner")
    result_file = 'out.txt'
    
    with open(result_file, 'w') as file1:
        print(election_dict['election'], file=file1)
    read_votes(result_file, election_dict) #<<<==== where we actually call read_votes
    all_elect_dict[election_num] = election_dict
    

0 election_05-04-2020_08-40-23_5cands_16noise.csv  
 
***********Election:  0
df.shape (41363, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


cand_list:  [<Candidate('candidate_1')>, <Candidate('candidate_5')>, <Candidate('candidate_2')>, <Candidate('candidate_3')>, <Candidate('candidate_4')>]
get_winners [<Candidate('candidate_1')>]
rounds 4
election: ROUND 1
Candidate      Votes  Status
-----------  -------  --------
candidate_1     9129  Hopeful
candidate_5     9010  Hopeful
candidate_2     8455  Hopeful
candidate_3     7911  Hopeful
candidate_4     6858  Rejected

ROUND 2
Candidate      Votes  Status
-----------  -------  --------
candidate_1    10902  Hopeful
candidate_5    10640  Hopeful
candidate_3    10078  Hopeful
candidate_2     9743  Rejected
candidate_4        0  Rejected

ROUND 3
Candidate      Votes  Status
-----------  -------  --------
candidate_1    14069  Hopeful
candidate_3    13671  Hopeful
candidate_5    13623  Rejected
candidate_2        0  Rejected
candidate_4        0  Rejected

FINAL RESULT
Candidate      Votes  Status
-----------  -------  --------
candidate_1    20693  Elected
candidate_3    20670 

In [7]:
all_elect_dict

{0: {'winner': 'candidate_1',
  'rounds': 4,
  'election': <ElectionResults(4 rounds)>,
  'can1firstRoundVotes': 9129,
  'winnerfirstRoundVotes': 9129,
  'can2firstRoundVotes': 8455,
  'can3firstRoundVotes': 7911,
  'can4firstRoundVotes': 6858,
  'can1secondRoundVotes': 10902,
  'winnersecondRoundVotes': 10902,
  'can3secondRoundVotes': 10078,
  'can2secondRoundVotes': 9743,
  'can4secondRoundVotes': 0,
  'can1thirdRoundVotes': 14069,
  'winnerthirdRoundVotes': 14069,
  'can3thirdRoundVotes': 13671,
  'can2thirdRoundVotes': 0,
  'can4thirdRoundVotes': 0,
  'can1finalRoundVotes': 20693,
  'winnerfinalRoundVotes': 20693,
  'can3finalRoundVotes': 20670,
  'can2finalRoundVotes': 0,
  'can4finalRoundVotes': 0},
 1: {'winner': 'candidate_1',
  'rounds': 3,
  'election': <ElectionResults(3 rounds)>,
  'can1firstRoundVotes': 13918,
  'winnerfirstRoundVotes': 13918,
  'can3firstRoundVotes': 12693,
  'can2firstRoundVotes': 10097,
  'can4firstRoundVotes': 7022,
  'can1secondRoundVotes': 16901,
  

In [8]:
# rounds = [1,2,3,4]
# print("round {}".format(rounds[i] for i in len(rounds)))

In [9]:
# rounds = ['ROUND 1', 'ROUND 2', 'ROUND 3', 'FINAL ROUND']
# for i in range (len (rounds)):
#     print("round {}.{}".format(i + 1, rounds[i]))

In [10]:
election_results_df=pd.DataFrame()
election_results_list=[]

In [11]:
for key,val in all_elect_dict.items():
    val['election_id']=election_num_dict[key]
    print(val)

    election_results_list.append(val)

{'winner': 'candidate_1', 'rounds': 4, 'election': <ElectionResults(4 rounds)>, 'can1firstRoundVotes': 9129, 'winnerfirstRoundVotes': 9129, 'can2firstRoundVotes': 8455, 'can3firstRoundVotes': 7911, 'can4firstRoundVotes': 6858, 'can1secondRoundVotes': 10902, 'winnersecondRoundVotes': 10902, 'can3secondRoundVotes': 10078, 'can2secondRoundVotes': 9743, 'can4secondRoundVotes': 0, 'can1thirdRoundVotes': 14069, 'winnerthirdRoundVotes': 14069, 'can3thirdRoundVotes': 13671, 'can2thirdRoundVotes': 0, 'can4thirdRoundVotes': 0, 'can1finalRoundVotes': 20693, 'winnerfinalRoundVotes': 20693, 'can3finalRoundVotes': 20670, 'can2finalRoundVotes': 0, 'can4finalRoundVotes': 0, 'election_id': 'election_05-04-2020_08-40-23_5cands_16noise.csv'}
{'winner': 'candidate_1', 'rounds': 3, 'election': <ElectionResults(3 rounds)>, 'can1firstRoundVotes': 13918, 'winnerfirstRoundVotes': 13918, 'can3firstRoundVotes': 12693, 'can2firstRoundVotes': 10097, 'can4firstRoundVotes': 7022, 'can1secondRoundVotes': 16901, 'winn

In [12]:
election_results_df = pd.DataFrame(election_results_list)
election_results_df.to_csv('election_results.csv', index=False)

In [13]:
election_results_df.head()

Unnamed: 0,winner,rounds,election,can1firstRoundVotes,winnerfirstRoundVotes,can2firstRoundVotes,can3firstRoundVotes,can4firstRoundVotes,can1secondRoundVotes,winnersecondRoundVotes,...,winnerfinalRoundVotes,can3finalRoundVotes,can2finalRoundVotes,can4finalRoundVotes,election_id,can3fourthRoundVotes,winnerfourthRoundVotes,can2fourthRoundVotes,can1fourthRoundVotes,can4fourthRoundVotes
0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,9129,9129,8455,7911,6858.0,10902.0,10902.0,...,20693,20670,0,0.0,election_05-04-2020_08-40-23_5cands_16noise.csv,,,,,
1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13918,13918,10097,12693,7022.0,16901.0,16901.0,...,22940,20790,0,0.0,election_04-30-2020_21-00-03_4_0.0225.csv,,,,,
2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,7177,7502,6848,6384,7502.0,8735.0,9065.0,...,17358,0,0,17358.0,election_05-02-2020_08-18-17_5cands_3noise.csv,,,,,
3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,3400,3400,2269,2053,2343.0,4475.0,4475.0,...,5761,0,0,4304.0,election_05-04-2020_08-38-34_4cands_12noise.csv,,,,,
4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,11210,11236,11236,10110,9061.0,14787.0,15605.0,...,25019,0,25019,0.0,election_04-30-2020_20-59-34_4_0.0375.csv,,,,,


In [14]:
# of votes per candidate in total / total votes
# of rounds in election*

# In total, how many 1st-place votes did the winner get
# In total, how many last-place votes did the 

In [15]:
election_results_df.fillna(0, inplace=True)

In [16]:
df = election_results_df 

In [17]:
df.to_csv("df.csv")

In [5]:
import pandas as pd
df = pd.read_csv('df.csv')

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,winner,rounds,election,can1firstRoundVotes,winnerfirstRoundVotes,can2firstRoundVotes,can3firstRoundVotes,can4firstRoundVotes,can1secondRoundVotes,...,winnerfinalRoundVotes,can3finalRoundVotes,can2finalRoundVotes,can4finalRoundVotes,election_id,can3fourthRoundVotes,winnerfourthRoundVotes,can2fourthRoundVotes,can1fourthRoundVotes,can4fourthRoundVotes
0,0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,9129,9129,8455,7911,6858.0,10902.0,...,20693,20670,0,0.0,election_05-04-2020_08-40-23_5cands_16noise.csv,0.0,0.0,0.0,0.0,0.0
1,1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13918,13918,10097,12693,7022.0,16901.0,...,22940,20790,0,0.0,election_04-30-2020_21-00-03_4_0.0225.csv,0.0,0.0,0.0,0.0,0.0
2,2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,7177,7502,6848,6384,7502.0,8735.0,...,17358,0,0,17358.0,election_05-02-2020_08-18-17_5cands_3noise.csv,0.0,0.0,0.0,0.0,0.0
3,3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,3400,3400,2269,2053,2343.0,4475.0,...,5761,0,0,4304.0,election_05-04-2020_08-38-34_4cands_12noise.csv,0.0,0.0,0.0,0.0,0.0
4,4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,11210,11236,11236,10110,9061.0,14787.0,...,25019,0,25019,0.0,election_04-30-2020_20-59-34_4_0.0375.csv,0.0,0.0,0.0,0.0,0.0


In [None]:
# df['TotalVotesWinner'] = df['firstRoundVotes']+df['secondRoundVotes']+df['thirdRoundVotes']+df['fourthRoundVotes']+df['finalRoundVotes']

In [8]:
#Getting total number of votes for each candidate in each round in each election


df['Can1Total'] = df.apply(lambda row: row['can1firstRoundVotes'] + row['can1secondRoundVotes']+ row['can1thirdRoundVotes'] + row['can1fourthRoundVotes'] + row['can1finalRoundVotes'], axis=1)

df['Can2Total'] = df.apply(lambda row: row['can2firstRoundVotes'] + row['can2secondRoundVotes']+ row['can2thirdRoundVotes'] + row['can2fourthRoundVotes'] + row['can2finalRoundVotes'], axis=1)

df['Can3Total'] = df.apply(lambda row: row['can3firstRoundVotes'] + row['can3secondRoundVotes']+ row['can3thirdRoundVotes'] + row['can3fourthRoundVotes'] + row['can3finalRoundVotes'], axis=1)

df['Can4Total'] = df.apply(lambda row: row['can4firstRoundVotes'] + row['can4secondRoundVotes']+ row['can4thirdRoundVotes'] + row['can4fourthRoundVotes'] + row['can4finalRoundVotes'], axis=1)

In [None]:
# df['Can5Total'] = df.apply(lambda row: row['can5firstRoundVotes'] + row['can5secondRoundVotes']+ row['can5thirdRoundVotes'] + row['can5fourthRoundVotes'] + row['can5finalRoundVotes'], axis=1)

In [None]:
# df.drop(columns=['firstRoundVotes','secondRoundVotes','thirdRoundVotes','finalRoundVotes','fourthRoundVotes'],inplace=True)

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,winner,rounds,election,can1firstRoundVotes,winnerfirstRoundVotes,can2firstRoundVotes,can3firstRoundVotes,can4firstRoundVotes,can1secondRoundVotes,...,election_id,can3fourthRoundVotes,winnerfourthRoundVotes,can2fourthRoundVotes,can1fourthRoundVotes,can4fourthRoundVotes,Can1Total,Can2Total,Can3Total,Can4Total
0,0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,9129,9129,8455,7911,6858.0,10902.0,...,election_05-04-2020_08-40-23_5cands_16noise.csv,0.0,0.0,0.0,0.0,0.0,54793.0,18198.0,52330.0,6858.0
1,1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13918,13918,10097,12693,7022.0,16901.0,...,election_04-30-2020_21-00-03_4_0.0225.csv,0.0,0.0,0.0,0.0,0.0,53759.0,21247.0,49162.0,7022.0
2,2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,7177,7502,6848,6384,7502.0,8735.0,...,election_05-02-2020_08-18-17_5cands_3noise.csv,0.0,0.0,0.0,0.0,0.0,43095.0,26114.0,13710.0,45447.0
3,3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,3400,3400,2269,2053,2343.0,4475.0,...,election_05-04-2020_08-38-34_4cands_12noise.csv,0.0,0.0,0.0,0.0,0.0,13636.0,4962.0,2053.0,9544.0
4,4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,11210,11236,11236,10110,9061.0,14787.0,...,election_04-30-2020_20-59-34_4_0.0375.csv,0.0,0.0,0.0,0.0,0.0,42595.0,51860.0,21335.0,9061.0


In [10]:
df.columns

Index(['Unnamed: 0', 'winner', 'rounds', 'election', 'can1firstRoundVotes',
       'winnerfirstRoundVotes', 'can2firstRoundVotes', 'can3firstRoundVotes',
       'can4firstRoundVotes', 'can1secondRoundVotes', 'winnersecondRoundVotes',
       'can3secondRoundVotes', 'can2secondRoundVotes', 'can4secondRoundVotes',
       'can1thirdRoundVotes', 'winnerthirdRoundVotes', 'can3thirdRoundVotes',
       'can2thirdRoundVotes', 'can4thirdRoundVotes', 'can1finalRoundVotes',
       'winnerfinalRoundVotes', 'can3finalRoundVotes', 'can2finalRoundVotes',
       'can4finalRoundVotes', 'election_id', 'can3fourthRoundVotes',
       'winnerfourthRoundVotes', 'can2fourthRoundVotes',
       'can1fourthRoundVotes', 'can4fourthRoundVotes', 'Can1Total',
       'Can2Total', 'Can3Total', 'Can4Total'],
      dtype='object')

In [11]:
df['first_rouns_list'] = df[['can1firstRoundVotes', 	'can2firstRoundVotes', 	'can3firstRoundVotes', 	'can4firstRoundVotes']].values.tolist()
df['sec_rouns_list'] = df[['can1secondRoundVotes', 	'can2secondRoundVotes', 	'can3secondRoundVotes', 'can4secondRoundVotes']].values.tolist()
df['third_rouns_list'] = df[['can1thirdRoundVotes', 	'can2thirdRoundVotes', 	'can3thirdRoundVotes', 	'can4thirdRoundVotes']].values.tolist()
df['fourth_rouns_list'] = df[['can1fourthRoundVotes', 	'can2fourthRoundVotes', 'can3fourthRoundVotes', 'can4fourthRoundVotes']].values.tolist()
df['final_rouns_list'] = df[['can1finalRoundVotes', 	'can2finalRoundVotes', 'can3finalRoundVotes', 'can4finalRoundVotes']].values.tolist()

In [12]:
df.head()

Unnamed: 0.1,Unnamed: 0,winner,rounds,election,can1firstRoundVotes,winnerfirstRoundVotes,can2firstRoundVotes,can3firstRoundVotes,can4firstRoundVotes,can1secondRoundVotes,...,can4fourthRoundVotes,Can1Total,Can2Total,Can3Total,Can4Total,first_rouns_list,sec_rouns_list,third_rouns_list,fourth_rouns_list,final_rouns_list
0,0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,9129,9129,8455,7911,6858.0,10902.0,...,0.0,54793.0,18198.0,52330.0,6858.0,"[9129.0, 8455.0, 7911.0, 6858.0]","[10902.0, 9743.0, 10078.0, 0.0]","[14069.0, 0.0, 13671.0, 0.0]","[0.0, 0.0, 0.0, 0.0]","[20693.0, 0.0, 20670.0, 0.0]"
1,1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13918,13918,10097,12693,7022.0,16901.0,...,0.0,53759.0,21247.0,49162.0,7022.0,"[13918.0, 10097.0, 12693.0, 7022.0]","[16901.0, 11150.0, 15679.0, 0.0]","[0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0]","[22940.0, 0.0, 20790.0, 0.0]"
2,2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,7177,7502,6848,6384,7502.0,8735.0,...,0.0,43095.0,26114.0,13710.0,45447.0,"[7177.0, 6848.0, 6384.0, 7502.0]","[8735.0, 8359.0, 7326.0, 9065.0]","[11056.0, 10907.0, 0.0, 11522.0]","[0.0, 0.0, 0.0, 0.0]","[16127.0, 0.0, 0.0, 17358.0]"
3,3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,3400,3400,2269,2053,2343.0,4475.0,...,0.0,13636.0,4962.0,2053.0,9544.0,"[3400.0, 2269.0, 2053.0, 2343.0]","[4475.0, 2693.0, 0.0, 2897.0]","[0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0]","[5761.0, 0.0, 0.0, 4304.0]"
4,4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,11210,11236,11236,10110,9061.0,14787.0,...,0.0,42595.0,51860.0,21335.0,9061.0,"[11210.0, 11236.0, 10110.0, 9061.0]","[14787.0, 15605.0, 11225.0, 0.0]","[0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0]","[16598.0, 25019.0, 0.0, 0.0]"


In [14]:
import numpy as np

In [15]:
df.winnerfirstRoundVotes=df.winnerfirstRoundVotes.astype(float)
df.winnersecondRoundVotes=df.winnersecondRoundVotes.astype(float)
df.winnerthirdRoundVotes=df.winnerthirdRoundVotes.astype(float)
df.winnerfourthRoundVotes=df.winnerfourthRoundVotes.astype(float)
df.winnerfinalRoundVotes=df.winnerfinalRoundVotes.astype(float)

df['winner_1st_place_votes_r1'] = np.where(df.winnerfirstRoundVotes == df.first_rouns_list.map(lambda x: x[0]),
                                           df.winnerfirstRoundVotes, 0)

df['winner_1st_place_votes_r2'] = np.where(df.winnersecondRoundVotes == df.sec_rouns_list.map(lambda x: x[0]),
                                           df.winnersecondRoundVotes, 0)

df['winner_1st_place_votes_r3'] = np.where(df.winnerthirdRoundVotes == df.third_rouns_list.map(lambda x: x[0]),
                                           df.winnerthirdRoundVotes, 0)

df['winner_1st_place_votes_r4'] = np.where(df.winnerfourthRoundVotes == df.fourth_rouns_list.map(lambda x: x[0]),
                                           df.winnerfourthRoundVotes, 0)

# df['winner_1st_place_votes_fr'] = np.where(df.winnerfinalRoundVotes == df.final_rouns_list.map(lambda x: x[0]), #since it is obvious that the first place vote would go to the winner should we include final ?
#                                            df.winnerfinalRoundVotes, 0)

df['total_1st_place_winner_votes'] = df['winner_1st_place_votes_r1'] + df['winner_1st_place_votes_r2'] + df['winner_1st_place_votes_r3'] + df['winner_1st_place_votes_r4']

In [16]:
df['winner_last_place_votes_r1'] = np.where(df.winnerfirstRoundVotes == df.first_rouns_list.map(lambda x: x[-1]),
                                           df.winnerfirstRoundVotes, 0)

df['winner_last_place_votes_r2'] = np.where(df.winnersecondRoundVotes == df.sec_rouns_list.map(lambda x: x[-1]),
                                           df.winnersecondRoundVotes, 0)

df['winner_last_place_votes_r3'] = np.where(df.winnerthirdRoundVotes == df.third_rouns_list.map(lambda x: x[-1]),
                                           df.winnerthirdRoundVotes, 0)

df['winner_last_place_votes_r4'] = np.where(df.winnerfourthRoundVotes == df.fourth_rouns_list.map(lambda x: x[-1]),
                                           df.winnerfourthRoundVotes, 0)

# df['winner_1st_place_votes_fr'] = np.where(df.winnerfinalRoundVotes == df.final_rouns_list.map(lambda x: x[0]), #since it is obvious that the first place vote would go to the winner should we include final ?
#                                            df.winnerfinalRoundVotes, 0)

df['total_last_place_winner_votes'] = df['winner_last_place_votes_r1'] + df['winner_last_place_votes_r2'] + df['winner_last_place_votes_r3'] + df['winner_last_place_votes_r4']

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,winner,rounds,election,can1firstRoundVotes,winnerfirstRoundVotes,can2firstRoundVotes,can3firstRoundVotes,can4firstRoundVotes,can1secondRoundVotes,...,winner_1st_place_votes_r1,winner_1st_place_votes_r2,winner_1st_place_votes_r3,winner_1st_place_votes_r4,total_1st_place_winner_votes,winner_last_place_votes_r1,winner_last_place_votes_r2,winner_last_place_votes_r3,winner_last_place_votes_r4,total_last_place_winner_votes
0,0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,9129,9129.0,8455,7911,6858.0,10902.0,...,9129.0,10902.0,14069.0,0.0,34100.0,0.0,0.0,0.0,0.0,0.0
1,1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13918,13918.0,10097,12693,7022.0,16901.0,...,13918.0,16901.0,0.0,0.0,30819.0,0.0,0.0,0.0,0.0,0.0
2,2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,7177,7502.0,6848,6384,7502.0,8735.0,...,0.0,0.0,0.0,0.0,0.0,7502.0,9065.0,11522.0,0.0,28089.0
3,3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,3400,3400.0,2269,2053,2343.0,4475.0,...,3400.0,4475.0,0.0,0.0,7875.0,0.0,0.0,0.0,0.0,0.0
4,4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,11210,11236.0,11236,10110,9061.0,14787.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Get the max value in each list
# if value is == to winner's value for specific round
# count value as 1st place vote for winner

# for last place do this same thing


In [18]:
df_final = df[[ 'winner', 'rounds', 'election', 'Can1Total', 'Can2Total','Can3Total','Can4Total', 'total_1st_place_winner_votes','total_last_place_winner_votes']]

In [19]:
df_final.head()

Unnamed: 0,winner,rounds,election,Can1Total,Can2Total,Can3Total,Can4Total,total_1st_place_winner_votes,total_last_place_winner_votes
0,candidate_1,4,ROUND 1\nCandidate Votes Status\n-------...,54793.0,18198.0,52330.0,6858.0,34100.0,0.0
1,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,53759.0,21247.0,49162.0,7022.0,30819.0,0.0
2,candidate_4,4,ROUND 1\nCandidate Votes Status\n-------...,43095.0,26114.0,13710.0,45447.0,0.0,28089.0
3,candidate_1,3,ROUND 1\nCandidate Votes Status\n-------...,13636.0,4962.0,2053.0,9544.0,7875.0,0.0
4,candidate_2,3,ROUND 1\nCandidate Votes Status\n-------...,42595.0,51860.0,21335.0,9061.0,0.0,0.0


In [20]:
df_final.to_csv("df_final.csv")