## Import statements for Dataframe manipulation

In [None]:
import pandas as pd
import numpy as np
import math
import re
import itertools

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Check for spoiler effect

In [16]:
#List of filenames

filenames = ['Alaska_08162022_HouseofRepresentativesSpecial',
             'Burlington_03032009_Mayor', 'Minneapolis_11022021_CityCouncilWard2',
             'PierceCounty_11042008_CountyExecutiveMember']

for filename in filenames:

  print(filename)

  # Read from file
  data = pd.read_csv('/content/drive/MyDrive/RCV Voting Method Research/Voting Ballots/' + filename + '.csv')

  # Remove NA
  data = data.dropna()

  # Remove irrelevant attributes
  irrelevantAttributes = []

  attributes = data.columns

  # identify irrelevant attributes
  pattern = re.compile("^[rR][aA][nN][kK]")

  length = len(attributes)

  for i in range(1, length + 1):
      if re.search(pattern, attributes[length - i]) == None:
        irrelevantAttributes.append(attributes[length - i])

  # drop irrelevant attributes
  data = data.drop(columns = irrelevantAttributes)
  attributes = data.columns

  # Find candidates
  candNum = data.shape[1]

  data = data.replace('^[Ww]rite.*in.*$', 'writein', regex=True)
  data = data.replace('^[Oo][Vv][Ee][Rr][Vv][Oo][Tt][Ee]', 'overvote', regex=True)
  data = data.replace(['skipped', 'Undeclared'], '-')

  # Convert to numpy
  data = data.to_numpy()

  # Ignore after overvote
  for i in range(data.shape[0]):
    overvoted = False
    for j in range(data.shape[1]):
      if data[i][j] == 'overvote':
        overvoted = True

      if overvoted:
        data[i][j] = '-'

  # Ignore duplicated candidates
  for i in range(data.shape[0]):
    ballot = ["candidate entry"] * candNum
    for j in range(data.shape[1]):
      for b in range(len(ballot)):
        if data[i][j] != '-':
          if ballot[b] == "candidate entry":
            ballot[b] = data[i][j]
            break
          elif ballot[b] == data[i][j]:
            data[i][j] = '-'
            break

  # Shift candidates to group on the left
  for i in range(np.size(data, 0)):
    for j in range(np.size(data, 1)-1):
      if data[i][j] == '-':
        k = j + 1
        while k < np.size(data, 1) and data[i][k] == '-':
          k += 1
        if k < np.size(data, 1):
          temp = data[i][j]
          data[i][j] = data[i][k]
          data[i][k] = temp

  data = pd.DataFrame(data, columns = attributes)

  sum = 0
  writein = 0
  for candidate in data.rank1.unique():
    count = (data['rank1'] == candidate).sum()
    print(candidate + ": " + str(count))
    if candidate != '-':
      sum = sum + count
    if candidate == 'writein':
      writein = count

  print("\n" + "Total: " + str(sum))
  ratio = float(writein) / sum
  print("Ratio: " + str(ratio) + "\n")


Alaska_08162022_HouseofRepresentativesSpecial
Begich, Nick: 52798
-: 3092
Palin, Sarah: 58582
Peltola, Mary S.: 74840
writein: 2977

Total: 189197
Ratio: 0.01573492180108564

Burlington_03032009_Mayor
Bob Kiss: 2585
Andy Montroll: 2063
Kurt Wright: 2951
Dan Smith: 1306
writein: 36
James Simpson: 35
-: 8

Total: 8976
Ratio: 0.004010695187165776

Minneapolis_11022021_CityCouncilWard2
Cam Gordon: 2507
Yusra Arab: 2716
Robin Wonsley Worlobah: 2761
Guy T Gaskin: 546
Tom Anderson: 980
-: 280
writein: 9

Total: 9519
Ratio: 0.0009454774661203908

PierceCounty_11042008_CountyExecutiveMember
Shawn Bunney: 105057
Calvin Goings: 69052
Pat McCarthy: 79235
Mike Lonergan: 45330
-: 13639
writein: 458

Total: 299132
Ratio: 0.0015310966396106067

