In [132]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# eavs = pd.read_csv('/Users/will/Repos/voting-equipment/eavs/2024_EAVS_for_Public_Release_nolabel_V1.csv')
eavs = pd.read_excel('/Users/will/Repos/voting-equipment/eavs/2024_EAVS_for_Public_Release_V1_xlsx.xlsx')
codebook = pd.read_excel('/Users/will/Repos/voting-equipment/eavs/2024_EAVS_Codebook.xlsx', index_col=0)

vv = pd.read_csv('data/vv_capture_2025-07-23_14-12-58/cleaned_verifier_data.csv', converters={'FIPS code': str}).convert_dtypes()


In [None]:
# Simulation parameters:
# - vvsg2_multiplier: applies a cost bump to all equipment types to reflect VVSG 2.x
#       ACET says 20-50% more, but we've heard lower and higher estimates, so let's use 1.2.
# - bmd_cost: base cost of Ballot Marking Devices. $3500 is what Brennan used,
#       and aligns with Caulfield's findings for ExpressVote pricing.
# - hand_fed_scanner_cost: cost of precinct-count scanners like DS200.
#       $5750 replicates Caulfield's estimate for DS200:
#       "We present summary statistics on pricing trends for a number of different voting
#        machines. The most common one in our sample—ES&S’s Model DS200 optical
#        scanner—had a strikingly consistent mode and median unit price of $5,750, which
#        is consistent with previous assessments"
#       $5000 is what Brennan used
# - batch_fed_scanner_cost: cost of high-speed scanners like DS850.
#       "These scan­ners can cost anywhere between $50,000 and $100,000 each,"
#       according to https://verifiedvoting.org/voting-machines-at-risk-in-2022-a-joint-analysis-from-the-brennan-center-and-verified-voting/
# - n_hand_fed_scanners_per_precinct: number of hand-fed scanners per precinct.
# - n_bmds_per_precinct: number of BMDs per precinct.
# - n_bmds_for_all_per_precinct: number of BMDs per precinct in jurisdictions
#       that use BMDs or DREs for all voters.
# - n_voters_per_batch_scanner: number of voters required per batch-fed scanner.
#   could get some anchors and adjust this by rate of mail-in voting.

parameters = {
    "vvsg2_multiplier": 1.2,
    "bmd_cost": 3500,
    "hand_fed_scanner_cost": 5000,
    "batch_fed_scanner_cost": 75000,
    "n_hand_fed_scanners_per_precinct": 1,
    "n_bmds_per_precinct": 2,
    "n_bmds_for_all_per_precinct": 4,
    "n_voters_per_batch_scanner": 10000,
}


Format                                                       Numeric
Label                        F12c Tally Location Provisional Ballots
ValidResponses                                                  5928
ValidSkip                                                          0
RespondedDoesNotApply                                            186
RespondedDataNotAvailable                                        280
Missing                                                           67
Range                                                            0-3
Name: F12c, dtype: object

goals:
  - estimate number of hand fed scanners per polling place
  - estimate number of BMDs per polling place, whether BMD for all or not
  - estimate number of batch-fed scanners per registered voter

sanity checks:
  - are total number of registered voters in line with VV dataset?
    - VV: 222 M; EAVS: 235 M. ✔️
  - and what about precincts?
    - VV: 184 K; EAVS: 177 K. ✔️

In [43]:
# convert to int if numeric
def safe_pos_float(x):
    try:
        x = float(x)
        if x > 0:
            return x
        else:
            return 0
    except ValueError:
        return 0

In [47]:
eavs['A1a'].apply(safe_pos_float).sum() # registered voters

np.float64(234504358.0)

In [46]:
eavs['D1a'].apply(safe_pos_float).sum() # precincts

np.float64(177708.0)

In [135]:
eavs['A4k'].apply(safe_pos_float).sum() # polling places total

np.float64(2246241.0)

In [137]:
eavs['D2a'].apply(safe_pos_float).sum() # polling places 2024 general election

np.float64(95815.0)

In [None]:
eavs['D3a'].apply(safe_pos_float).sum() # polling places 2024 elecion day

np.float64(94936.0)

In [None]:
n_bmds = eavs[bmd_for_all]['F5c_1'].apply(safe_pos_float).sum() # BMDs for all voters
n_precincts = eavs[bmd_for_all]['D1a'].apply(safe_pos_float).sum()


In [139]:
df = eavs.copy()
def coerce_column(col):
    col = pd.to_numeric(col, errors='coerce')
    col.loc[col < 0] = np.nan  # set negative values to NaN
    return col

df['F5c_1'] = coerce_column(df['F5c_1'])
df['D1a'] = coerce_column(df['D1a'])
df['A1a'] = coerce_column(df['A1a'])
df['D2a'] = coerce_column(df['D2a'])
# df['F5c_1'] = pd.to_numeric(df['F5c_1'], errors='coerce')
# set negative values to nan
# df.loc[df['F5c_1']<0, 'F5c_1'] = np.nan


In [144]:
has_bmd = df['F5a'] == 'Yes'
bmd_for_all_fips = vv[vv['Election Day Marking Method'] == 'Ballot Marking Devices for all voters']['FIPS code'].apply(int).unique()

bmd_for_all = (df['F5a'] == 'Yes') & (df['FIPSCode'].isin(bmd_for_all_fips))
n_bmds = df[bmd_for_all]['F5c_1'].apply(safe_pos_float).sum() # BMDs for all voters
n_precincts = df[bmd_for_all]['D1a'].apply(safe_pos_float).sum()
n_polling_places = df[bmd_for_all]['D2a'].apply(safe_pos_float).sum() # polling places total

n_bmds / n_precincts # BMDs per precinct in jurisdictions that use BMDs for all voters

np.float64(3.5466184312527895)

In [145]:
not_bmd_for_all = (df['F5a'] == 'Yes') & (~df['FIPSCode'].isin(bmd_for_all_fips))
n_bmds = df[not_bmd_for_all]['F5c_1'].apply(safe_pos_float).sum() # BMDs for not all voters
n_precincts = df[not_bmd_for_all]['D1a'].apply(safe_pos_float).sum()
n_polling_places = df[not_bmd_for_all]['D2a'].apply(safe_pos_float).sum() # polling places total

n_bmds / n_precincts

np.float64(0.6471214244925505)

In [None]:
# need to sort models by which ones are central scanners

In [None]:
df

In [149]:
has_scanner = df['F6a'] == 'Yes'
df[has_scanner]['F6b_1'].value_counts()

F6b_1
DS200 (ES&S)                                   2154
ImageCast Precinct/ICP (Dominion)               449
Verity Scan (Hart)                              304
Other (use text box to describe)                283
AccuVote-OS (Premier)                           179
DS450 (ES&S)                                    160
ImageCast Central/ICC (Dominion)                136
Data not available                              119
ImageCast Evolution/ICE (Dominion)              107
DS300 (ES&S)                                     99
OpenElect Voting Optical Scan/OVO (Unisyn)       94
eScan (Hart)                                     91
ClearCast (Clear Ballot)                         90
ImageCast Precint2/ICP2                          68
OpenElect Freedom Vote Scan (Unisyn)             58
Chatsworth ACP (MicroVotes)                      56
ClearCount (Clear Ballot)                        39
DS850 (ES&S)                                     39
Verity Central (Hart)                            33
DS950 

In [162]:
df[has_scanner]['F6b_2'].value_counts()

F6b_2
Does not apply                                 1922
ImageCast Central/ICC (Dominion)                270
DS450 (ES&S)                                    241
DS850 (ES&S)                                    123
Data not available                              103
Valid skip                                       94
BallotNow (Hart)                                 77
Verity Central (Hart)                            45
DS950 (ES&S)                                     20
DS200 (ES&S)                                     14
OpenElect Voting Central Scan/OVCS (Unisyn)      13
ClearCount (Clear Ballot)                         9
DS300 (ES&S)                                      7
ImageCast Evolution/ICE (Dominion)                7
Verity Scan (Hart)                                6
OpenElect Freedom Vote Scan (Unisyn)              6
Other (use text box to describe)                  3
ClearCast (Clear Ballot)                          2
ExpressVote Tabulator (ES&S)                      1
M100 (

In [165]:
has_scanner = df['F6a'] == 'Yes'
df[has_scanner]['F6b_1'].value_counts() + df[has_scanner]['F6b_2'].value_counts() + df[has_scanner]['F6b_3'].value_counts()

AccuVote-OS (Premier)                             NaN
BallotNow (Hart)                                  NaN
Chatsworth ACP (MicroVotes)                       NaN
ClearCast (Clear Ballot)                          NaN
ClearCount (Clear Ballot)                        49.0
DS200 (ES&S)                                   2170.0
DS300 (ES&S)                                    107.0
DS450 (ES&S)                                    413.0
DS850 (ES&S)                                    176.0
DS950 (ES&S)                                     39.0
Data not available                              378.0
Does not apply                                    NaN
ExpressVote Tabulator (ES&S)                     13.0
ExpressVoteXL (ES&S)                              NaN
IBML (Los Angeles County)                         NaN
ImageCast Central/ICC (Dominion)                407.0
ImageCast Evolution/ICE (Dominion)                NaN
ImageCast Precinct/ICP (Dominion)                 NaN
ImageCast Precint2/ICP2     