In [401]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import humanize

In [402]:
# eavs = pd.read_csv('/Users/will/Repos/voting-equipment/eavs/2024_EAVS_for_Public_Release_nolabel_V1.csv')
eavs = pd.read_excel('/Users/will/Repos/voting-equipment/eavs/2024_EAVS_for_Public_Release_V1_xlsx.xlsx')
codebook = pd.read_excel('/Users/will/Repos/voting-equipment/eavs/2024_EAVS_Codebook.xlsx', index_col=0)

vv = pd.read_csv('data/vv_capture_2025-07-23_14-12-58/cleaned_verifier_data.csv', converters={'FIPS code': str}).convert_dtypes()
vv = vv[vv['Year'] == 2026]


  vv = pd.read_csv('data/vv_capture_2025-07-23_14-12-58/cleaned_verifier_data.csv', converters={'FIPS code': str}).convert_dtypes()


In [403]:
vv['Election Day Marking Method'].value_counts()

Election Day Marking Method
Hand marked paper ballots and BMDs                                                                 22151
Ballot Marking Devices for all voters                                                               3364
DREs with VVPAT for all voters                                                                       588
DREs without VVPAT for all voters                                                                    458
Hand marked paper ballots and DREs with VVPAT                                                        437
Hand marked paper ballots; Direct recording assistive interface without VVPAT for accessibility      385
Hand marked paper ballots and DREs without VVPAT                                                      10
Hand marked paper ballots, BMDs and DREs with VVPAT                                                    8
Hand marked paper ballots, BMDs and DREs without VVPAT                                                 8
Hand marked paper ballots, 

In [484]:
bmd_for_all = ['Ballot Marking Devices for all voters']
dre_for_all = ['DREs with VVPAT for all voters', 'DREs without VVPAT for all voters']


bmd_for_all_fips = vv[vv['Election Day Marking Method'].isin(bmd_for_all)]['FIPS code'].apply(int).unique()
eavs['bmd_for_all'] = eavs['FIPSCode'].isin(bmd_for_all_fips)

dre_for_all_fips = vv[vv['Election Day Marking Method'].isin(dre_for_all)]['FIPS code'].apply(int).unique()
eavs['dre_for_all'] = eavs['FIPSCode'].isin(dre_for_all_fips)

eavs['bmd_or_dre_for_all'] = eavs['bmd_for_all'] | eavs['dre_for_all']

goals:
  - estimate number of hand fed scanners per polling place
  - estimate number of BMDs per polling place, whether BMD for all or not
  - estimate number of batch-fed scanners per registered voter

sanity checks:
  - are total number of registered voters in line with VV dataset?
    - VV: 222 M; EAVS: 235 M. ✔️
  - and what about precincts?
    - VV: 184 K; EAVS: 178 K. ✔️
    - but why does VV use precincts and not polling places? there are 95.8 k polling places.


update: maybe just get amounts of equipment directly from EAVS


EAVS question format on equipment

F[#] Codes:

F3 → DRE without VVPAT

F4 → DRE with VVPAT

F5 → Ballot Marking Device (BMD)

F6 → Scanner
```
F[#] — Device Category
├── a           → Is this device type used? (Yes/No)
├── b_1         → Model of first device (if any)
│   └── c_1     → Quantity of that model
├── b_2         → Model of second device (if any)
│   └── c_2     → Quantity of that model
└── b_3         → Model of third device (if any)
    └── c_3     → Quantity of that model
```

In [485]:
# coerce non-equipment columns

# convert to int if numeric
def safe_pos_float(x):
    try:
        x = float(x)
        if x > 0:
            return x
        else:
            return 0
    except ValueError:
        return 0
    
def coerce_column(col):
    col = pd.to_numeric(col, errors='coerce')
    col.loc[col < 0] = np.nan  # set negative values to NaN
    return col

numeric_columns = ['D1a', 'A1a', 'D2a', 'F1a', 'F1b', 'F1d', 'F1e', 'F1f', 'F1g']
for col in numeric_columns:
    eavs[col] = coerce_column(eavs[col])

# Coerce equipment count columns
for equipment in ['3', '4', '5', '6']:
    for i in range(1, 4):
        col = f'F{equipment}c_{i}'
        if col in eavs.columns:
            eavs[col] = coerce_column(eavs[col])



In [486]:
print(f'{humanize.intword(eavs["A1a"].apply(safe_pos_float).sum())} registered voters')
print(f'{humanize.intword(eavs["F1a"].apply(safe_pos_float).sum())} ballots cast')
print(f'{humanize.intword(eavs["D1a"].apply(safe_pos_float).sum())} precincts')
print(f'{humanize.intword(eavs["A4k"].apply(safe_pos_float).sum())} polling places total')
print(f'{humanize.intword(eavs["D2a"].apply(safe_pos_float).sum())} polling places 2024 general election')
print(f'{humanize.intword(eavs["D3a"].apply(safe_pos_float).sum())} polling places 2024 election day')

234.8 million registered voters
158.2 million ballots cast
177.7 thousand precincts
2.2 million polling places total
95.8 thousand polling places 2024 general election
94.9 thousand polling places 2024 election day


In [487]:
# indicate whether scanners are hand-fed or batch-fed

scanners = {
    'AccuVote-OS (Premier)': 'hand-fed',
    'BallotNow (Hart)': 'batch-fed',
    'Chatsworth ACP (MicroVotes)': 'hand-fed',
    'ClearCast (Clear Ballot)': 'hand-fed',
    'ClearCount (Clear Ballot)': 'batch-fed',
    'DS200 (ES&S)': 'hand-fed',
    'DS300 (ES&S)': 'hand-fed',
    'DS450 (ES&S)': 'batch-fed',
    'DS850 (ES&S)': 'batch-fed',
    'DS950 (ES&S)': 'batch-fed',
    'ExpressVote Tabulator (ES&S)': 'hand-fed',
    'ExpressVoteXL (ES&S)': 'hand-fed',
    'IBML (Los Angeles County)': 'batch-fed',
    'ImageCast Central/ICC (Dominion)': 'batch-fed',
    'ImageCast Evolution/ICE (Dominion)': 'hand-fed',
    'ImageCast Precinct/ICP (Dominion)': 'hand-fed',
    'ImageCast Precint2/ICP2': 'hand-fed',
    'M100 (ES&S)': 'hand-fed',
    'M650 (ES&S)': 'batch-fed',
    'OpenElect Freedom Vote Scan (Unisyn)': 'hand-fed',
    'OpenElect Voting Central Scan/OVCS (Unisyn)': 'batch-fed',
    'OpenElect Voting Optical Scan/OVO (Unisyn)': 'hand-fed',
    'Verity Central (Hart)': 'batch-fed',
    'Verity Scan (Hart)': 'hand-fed',
    'eScan (Hart)': 'hand-fed'
}


In [488]:
turnout = eavs['F1a'].sum() / eavs['A1a'].sum()
turnout

# impute registered voters for North Dakota
nd = eavs['State_Full'] == 'NORTH DAKOTA'
eavs.loc[nd, 'A1a'] = round(eavs.loc[nd, 'F1a'] * turnout)

In [489]:
# count  devices for each type of equipment

def count_devices(row, prefix):
    total = 0
    has_data = False

    for i in range(1, 4):
        model = row.get(f'{prefix}b_{i}')
        count = row.get(f'{prefix}c_{i}')
        if pd.isna(model) or pd.isna(count):
            continue
        has_data = True
        total += count

    if not has_data:
        return np.nan
    return total

equipment_codes = {'dre_wo_vvpat': 'F3',
                   'dre_w_vvpat': 'F4',
                   'bmd': 'F5',
                   'scanner': 'F6'}

for equipment, prefix in equipment_codes.items():
    eavs[f'{equipment}_total'] = eavs.apply(lambda row: count_devices(row, prefix), axis=1)

# count hand-fed and batch-fed totals for a row
def count_scanners(row):
    hand_total = 0
    batch_total = 0
    has_hand = False
    has_batch = False
    has_data = False

    for i in range(1, 4):
        model = row.get(f'F6b_{i}')
        count = row.get(f'F6c_{i}')
        if pd.isna(model) or pd.isna(count):
            continue
        has_data = True

        feed_type = scanners.get(model, 'unknown')
        if feed_type == 'unknown':
            return pd.Series({'hand_fed_total': np.nan, 'batch_fed_total': np.nan})
        elif feed_type == 'hand-fed':
            hand_total += count
            has_hand = True
        elif feed_type == 'batch-fed':
            batch_total += count
            has_batch = True

    if not has_data:
        return pd.Series({'hand_fed_total': np.nan, 'batch_fed_total': np.nan})

    return pd.Series({
        'hand_fed_total': hand_total if has_hand else np.nan,
        'batch_fed_total': batch_total if has_batch else np.nan
    })

# Apply function row-wise
eavs[['hand_fed_total', 'batch_fed_total']] = eavs.apply(count_scanners, axis=1)


In [490]:
def incomplete_equipment_data(row):
    incomplete = False
    # identifies rows where equipment is marked as 'Yes' but no count is provided

    for equipment in ['3', '4', '5', '6']:
        if row[f'F{equipment}a'] == 'Yes':
            # only check the first model for each equipment type
            if pd.isna(row[f'F{equipment}c_1']): # to require model, also use pd.isna(row[f'F{equipment}b_1'])
                incomplete = True

    return incomplete

eavs['incomplete_equipment'] = eavs.apply(incomplete_equipment_data, axis=1)

equipment_counts = [key + '_total' for key in equipment_codes.keys()][:-1] + ['hand_fed_total', 'batch_fed_total']
eavs['total_equipment_counts'] = eavs[equipment_counts].sum(axis=1)

eavs['equipment_per_voter'] = eavs['total_equipment_counts'] / eavs['A1a']

In [491]:
eavs['use_for_analysis'] = (eavs['total_equipment_counts'] > 0) & ~eavs['incomplete_equipment'] & ~eavs['dre_for_all']
eq = eavs[eavs['use_for_analysis']].copy()

# filter the data only to those with complete equipment data and non-zero registered voters

In [492]:
eq.groupby('State_Full')[equipment_counts + ['A1a', 'total_equipment_counts']].sum().sort_values('total_equipment_counts', ascending=False)

Unnamed: 0_level_0,dre_wo_vvpat_total,dre_w_vvpat_total,bmd_total,hand_fed_total,batch_fed_total,A1a,total_equipment_counts
State_Full,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TEXAS,942.0,0.0,45410.0,8173.0,82.0,17026306.0,54607.0
PENNSYLVANIA,0.0,0.0,13044.0,10932.0,103.0,7327070.0,24079.0
CALIFORNIA,0.0,0.0,21992.0,1207.0,281.0,24988677.0,23480.0
GEORGIA,0.0,0.0,19441.0,2601.0,172.0,5678068.0,22214.0
SOUTH CAROLINA,0.0,0.0,15659.0,3124.0,22.0,3851187.0,18805.0
ILLINOIS,0.0,46.0,10825.0,7643.0,4.0,8854280.0,18518.0
OHIO,0.0,39.0,13013.0,4629.0,140.0,6655092.0,17821.0
NEW YORK,0.0,0.0,5382.0,10793.0,0.0,13579416.0,16175.0
FLORIDA,0.0,0.0,5177.0,8572.0,133.0,15740083.0,13882.0
NEW JERSEY,0.0,0.0,10994.0,0.0,64.0,6580540.0,11058.0


In [494]:
grouped = eq.groupby('bmd_for_all')[equipment_counts + ['scanner_total'] + ['A1a']].sum()
per_voter_rates = grouped[equipment_counts].div(grouped['A1a'], axis=0)
per_voter_rates


Unnamed: 0_level_0,dre_wo_vvpat_total,dre_w_vvpat_total,bmd_total,hand_fed_total,batch_fed_total
bmd_for_all,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
False,3e-06,3e-06,0.000492,0.000525,1.2e-05
True,1.2e-05,3.6e-05,0.002672,0.000482,9e-06


## extrapolate findings to the rows that were not used for analysis

In [495]:
eavs = eavs.copy()

# Apply idealized estimates only to rows not used for analysis
mask = ~eavs['use_for_analysis']

# Estimate based on whether the jurisdiction uses BMDs for all voters
for bmd_flag in [True, False]:
    rate_row = per_voter_rates.loc[bmd_flag]
    submask = mask & (eavs['bmd_or_dre_for_all'] == bmd_flag)
    for equipment in equipment_counts:
        eavs.loc[submask, f'{equipment}_ideal'] = eavs.loc[submask, 'A1a'] * rate_row[equipment]

# # For rows used in analysis, copy actuals into the _ideal columns
for equipment in equipment_counts:
    eavs.loc[eavs['use_for_analysis'], f'{equipment}_ideal'] = eavs.loc[eavs['use_for_analysis'], equipment]

eavs['bmd_total_ideal'] = (
    eavs.get('bmd_total_ideal', 0).fillna(0) +
    eavs.get('dre_wo_vvpat_total_ideal', 0).fillna(0) +
    eavs.get('dre_w_vvpat_total_ideal', 0).fillna(0)
)


In [496]:
cbo = pd.read_excel('/Users/will/Repos/voting-equipment/cbo_projections.xlsx',
                    sheet_name='1. Econ Vars_Annual Rates',
                    header=7, index_col=0).T

inflation_rates = cbo['Growth of the CPI-U'] / 100

def inflation_multiplier(inflation_series, start_year, end_year):
    """Calculate compounded inflation multiplier from start_year to end_year (inclusive)."""
    relevant_years = inflation_series.loc[start_year:end_year]
    multiplier = (1 + relevant_years).prod()
    return multiplier


In [566]:
bmd_cost

np.float64(5575.638324557481)

In [568]:
hand_fed_scanner_cost

np.float64(9159.97724748729)

In [569]:
batch_fed_scanner_cost

np.float64(119477.9640976603)

In [571]:
cost_multiplier

np.float64(1.5930395213021373)

In [497]:
vvsg_multiplier = 1.2  # VVSG 2.0 multiplier

replacement_year = 2028
start_year = 2021

inflation = inflation_multiplier(inflation_rates, start_year, replacement_year)

cost_multiplier = vvsg_multiplier * inflation

bmd_cost = 3500 * cost_multiplier
hand_fed_scanner_cost = 5750 * cost_multiplier
batch_fed_scanner_cost = 75000 * cost_multiplier

bmd_count = eavs['bmd_total_ideal'].sum()
hand_fed_scanner_count = eavs['hand_fed_total_ideal'].sum()
batch_fed_scanner_count = eavs['batch_fed_total_ideal'].sum()

bmd_total_cost = bmd_count * bmd_cost
hand_fed_scanner_total_cost = hand_fed_scanner_count * hand_fed_scanner_cost
batch_fed_scanner_total_cost = batch_fed_scanner_count * batch_fed_scanner_cost

print(f"Replacing {humanize.intword(bmd_count)} BMDs will cost ${humanize.intword(bmd_total_cost)}")
print(f"Replacing {humanize.intword(hand_fed_scanner_count)} Hand-Fed Scanners will cost ${humanize.intword(hand_fed_scanner_total_cost)}")
print(f"Replacing {humanize.intword(batch_fed_scanner_count)} Batch-Fed Scanners will cost ${humanize.intword(batch_fed_scanner_total_cost)}")
print(f"The total cost of replacing all equipment is ${humanize.intword(bmd_total_cost + hand_fed_scanner_total_cost + batch_fed_scanner_total_cost)}")

Replacing 274.4 thousand BMDs will cost $1.5 billion
Replacing 120.3 thousand Hand-Fed Scanners will cost $1.1 billion
Replacing 2.6 thousand Batch-Fed Scanners will cost $306.0 million
The total cost of replacing all equipment is $2.9 billion


In [541]:
eavs['bmd_or_dre_for_all'].value_counts()

bmd_or_dre_for_all
False    5646
True      815
Name: count, dtype: int64

In [565]:

# Build table rows
rows = []

for label, group in eavs.groupby('bmd_or_dre_for_all'):
    jurisdictions = len(group)
    bmd_count = group['bmd_total_ideal'].sum()
    hand_fed_count = group['hand_fed_total_ideal'].sum()
    batch_fed_count = group['batch_fed_total_ideal'].sum()

    bmd_total = bmd_count * bmd_cost
    hand_fed_total = hand_fed_count * hand_fed_scanner_cost
    batch_fed_total = batch_fed_count * batch_fed_scanner_cost
    total_cost = bmd_total + hand_fed_total + batch_fed_total

    rows.append({
        'bmd_or_dre_for_all': label,
        '# Jurisdictions': jurisdictions,
        '# BMDs': int(bmd_count),
        'BMD Total Cost': bmd_total,
        '# Hand-Fed Scanners': int(hand_fed_count),
        'Hand-Fed Scanner Total Cost': hand_fed_total,
        '# Batch-Fed Scanners': int(batch_fed_count),
        'Batch-Fed Scanner Total Cost': batch_fed_total,
        'Total Cost': total_cost,
    })

# Create DataFrame
cost_table = pd.DataFrame(rows)

# Add total row
totals = {
    'bmd_or_dre_for_all': 'Total',
    '# Jurisdictions': cost_table['# Jurisdictions'].sum(),
    '# BMDs': cost_table['# BMDs'].sum(),
    'BMD Total Cost': cost_table['BMD Total Cost'].sum(),
    '# Hand-Fed Scanners': cost_table['# Hand-Fed Scanners'].sum(),
    'Hand-Fed Scanner Total Cost': cost_table['Hand-Fed Scanner Total Cost'].sum(),
    '# Batch-Fed Scanners': cost_table['# Batch-Fed Scanners'].sum(),
    'Batch-Fed Scanner Total Cost': cost_table['Batch-Fed Scanner Total Cost'].sum(),
    'Total Cost': cost_table['Total Cost'].sum(),
}
cost_table = pd.concat([cost_table, pd.DataFrame([totals])], ignore_index=True)

# Custom formatter: shows M or B with 1 decimal when needed
def number_formatter(x, pos=None):
    units = [(1e12, 'T'), (1e9, 'B'), (1e6, 'M'), (1e3, 'K')]

    for factor, suffix in units:
        if x >= factor:
            return f'{x / factor:.3g} {suffix}'.rstrip('0').rstrip('.')

    return f'{x:.0f}'

# Format cost columns
for col in ['BMD Total Cost', 'Hand-Fed Scanner Total Cost', 'Batch-Fed Scanner Total Cost', 'Total Cost']:
    # cost_table[col] = cost_table[col].map('${:,.0f}'.format)
    # cost_table[col] = cost_table[col].apply(lamda x: "humanize.intword)
    cost_table[col] = cost_table[col].apply(lambda x: f'${number_formatter(x)}')

# Format number columns
for col in ['# Jurisdictions', '# BMDs', '# Hand-Fed Scanners', '# Batch-Fed Scanners']:
    cost_table[col] = cost_table[col].apply(number_formatter)

cost_table

cost_table = cost_table.set_index('bmd_or_dre_for_all')
cost_table.index = cost_table.index.map({
    True: 'BMD/DRE-for-all jurisdictions',
    False: 'HMPB+BMD jurisdictions',
    'Total': 'Total'
})
cost_table.index.name = None  # Remove index title

# Reorder rows
cost_table = cost_table.reindex([
    'BMD/DRE-for-all jurisdictions',
    'HMPB+BMD jurisdictions',
    'Total'
])

cost_table.index.name = None  # Remove index title

cost_table

Unnamed: 0,# Jurisdictions,# BMDs,BMD Total Cost,# Hand-Fed Scanners,Hand-Fed Scanner Total Cost,# Batch-Fed Scanners,Batch-Fed Scanner Total Cost,Total Cost
BMD/DRE-for-all jurisdictions,815,193 K,$1.08 B,34.2 K,$313 M,619,$74 M,$1.46 B
HMPB+BMD jurisdictions,5.65 K,81.6 K,$455 M,86.1 K,$788 M,1.94 K,$232 M,$1.48 B
Total,6.46 K,274 K,$1.53 B,120 K,$1.1 B,2.56 K,$306 M,$2.94 B


In [511]:
la = eavs['State_Full'] == 'LOUISIANA'
eavs[la][['bmd_total', 'bmd_total_ideal', 'hand_fed_total', 'hand_fed_total_ideal']].sum()

bmd_total                  0.000000
bmd_total_ideal         8286.664168
hand_fed_total             0.000000
hand_fed_total_ideal    1469.280441
dtype: float64

In [519]:
eavs[la]['bmd_total_ideal'].sum() * bmd_cost / 1e6

np.float64(46.203442318391254)

In [518]:
eavs[la]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6

np.float64(13.458575411097915)

In [527]:
ok = eavs['State_Full'] == 'OKLAHOMA'
eavs[ok]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6
eavs[ok]['bmd_total_ideal'].sum() * bmd_cost / 1e6

eavs[ok][['dre_w_vvpat_total', 'dre_wo_vvpat_total', 'bmd_total', 'bmd_total_ideal', 'hand_fed_total', 'hand_fed_total_ideal']].sum()

dre_w_vvpat_total          0.0
dre_wo_vvpat_total         0.0
bmd_total                  0.0
bmd_total_ideal            0.0
hand_fed_total          2067.0
hand_fed_total_ideal    2067.0
dtype: float64

In [528]:
tx = eavs['State_Full'] == 'TEXAS'
eavs[tx]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6
eavs[tx]['bmd_total_ideal'].sum() * bmd_cost / 1e6

eavs[tx][['dre_w_vvpat_total', 'dre_wo_vvpat_total', 'bmd_total', 'bmd_total_ideal', 'hand_fed_total', 'hand_fed_total_ideal']].sum()

dre_w_vvpat_total           0.000000
dre_wo_vvpat_total       1030.000000
bmd_total               46027.000000
bmd_total_ideal         49122.319212
hand_fed_total           8379.000000
hand_fed_total_ideal     8973.985984
dtype: float64

In [534]:
eavs[tx]['bmd_total_ideal'].sum() * bmd_cost / 1e6 + eavs[tx]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6 + eavs[tx]['batch_fed_total_ideal'].sum() * batch_fed_scanner_cost / 1e6

np.float64(367.81890128419025)

In [533]:
sc = eavs['State_Full'] == 'SOUTH CAROLINA'
eavs[sc]['bmd_total_ideal'].sum() * bmd_cost / 1e6 + eavs[sc]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6

np.float64(115.92468944539588)

In [536]:
ny = eavs['State_Full'] == 'NEW YORK'
eavs[ny]['bmd_total_ideal'].sum() * bmd_cost / 1e6 + eavs[ny]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6 + eavs[ny]['batch_fed_total_ideal'].sum() * batch_fed_scanner_cost / 1e6

np.float64(128.8717198948987)

In [538]:
ny = eavs['State_Full'] == 'NEW YORK'
eavs[ny]['hand_fed_total_ideal'].sum() * hand_fed_scanner_cost / 1e6
eavs[ny]['bmd_total_ideal'].sum() * bmd_cost / 1e6

eavs[ny][['dre_w_vvpat_total', 'dre_wo_vvpat_total', 'bmd_total', 'bmd_total_ideal', 'hand_fed_total', 'hand_fed_total_ideal', 'batch_fed_total', 'batch_fed_total_ideal']].sum()

dre_w_vvpat_total            0.0
dre_wo_vvpat_total           0.0
bmd_total                 5382.0
bmd_total_ideal           5382.0
hand_fed_total           10793.0
hand_fed_total_ideal     10793.0
batch_fed_total              0.0
batch_fed_total_ideal        0.0
dtype: float64

## Analysis (flawed) of central vs precinct count

can't really use this.

In [11]:
eavs['batch_fed_total'].sum()

np.float64(2126.0)

In [12]:
has_scanner = eavs['F6a'] == 'Yes'

eavs[has_scanner]['F6b_1'].value_counts() + eavs[has_scanner]['F6b_2'].value_counts() + eavs[has_scanner]['F6b_3'].value_counts()

AccuVote-OS (Premier)                             NaN
BallotNow (Hart)                                  NaN
Chatsworth ACP (MicroVotes)                       NaN
ClearCast (Clear Ballot)                          NaN
ClearCount (Clear Ballot)                        49.0
DS200 (ES&S)                                   2170.0
DS300 (ES&S)                                    107.0
DS450 (ES&S)                                    413.0
DS850 (ES&S)                                    176.0
DS950 (ES&S)                                     39.0
Data not available                              378.0
Does not apply                                    NaN
ExpressVote Tabulator (ES&S)                     13.0
ExpressVoteXL (ES&S)                              NaN
IBML (Los Angeles County)                         NaN
ImageCast Central/ICC (Dominion)                407.0
ImageCast Evolution/ICE (Dominion)                NaN
ImageCast Precinct/ICP (Dominion)                 NaN
ImageCast Precint2/ICP2     

do batch-fed scanners as a function of number of mail-in ballots?

In [13]:
pd.to_numeric(eavs['C8a'], errors='coerce').sum()

np.float64(47629437.0)

In [None]:
eavs['ballots_counted_centrally'] = (eavs.loc[eavs['F12a'] == 'Central location', 'F1b'] + # ballots cast in person on election day
    eavs.loc[eavs['F12c'] == 'Central location', 'F1e'] + # provisional ballots
    eavs.loc[eavs['F12d'] == 'Central location', 'F1f'] + # ballots cast early in person
    eavs.loc[eavs['F12e'] == 'Central location', 'F1d'] + # mail ballots, jurisdictions that do not send mail ballots to all voters
    eavs.loc[eavs['F12e'] == 'Central location', 'F1g']) #  mail ballots, jurisdictions that send mail ballots to all voters

humanize.intword(eavs['ballots_counted_centrally'].sum())


# F1b/e/f/g (ballots counted) * F12a/c/d/e

'60.8 million'

In [None]:
# % of total ballots (F1a) counted centrally. this is way off because jurisdictions are incorrectly reporting a central scan
(eavs.groupby('State_Full')['ballots_counted_centrally'].sum().sort_values(ascending=False) / 
  eavs.groupby('State_Full')['F1a'].sum().sort_values(ascending=False)).sort_values(ascending=False)*100

State_Full
NORTHERN MARIANA ISLANDS    100.000000
HAWAII                      100.000000
GUAM                         99.838193
LOUISIANA                    99.795705
WEST VIRGINIA                99.779773
OKLAHOMA                     99.732087
DELAWARE                     99.673191
ILLINOIS                     99.558609
WYOMING                      99.512767
NEW YORK                     99.393537
MARYLAND                     99.381705
NEW MEXICO                   99.377642
NEVADA                       99.248939
COLORADO                     99.005355
NEBRASKA                     97.631651
CALIFORNIA                   83.172522
SOUTH DAKOTA                 82.570530
UTAH                         74.920785
TEXAS                        70.363092
KENTUCKY                     49.051815
MISSOURI                     45.564064
OHIO                         44.184359
ARIZONA                      35.581653
ARKANSAS                     22.799520
FLORIDA                      18.397282
INDIANA       