In [1]:
from hart_tools import (
    prep_manifest,
    read_hart_cvr, 
    read_cvrs, 
    check_for_contest,
    filter_cvr_contest,
    tabulate_styles
)

from IPython.core.interactiveshell import InteractiveShell

from assertion_audit_utils import \
    Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\
    find_p_values, find_sample_size, new_sample_size, summarize_status,\
    write_audit_parameters, sort_cvr_sample_num, consistent_sampling

import os
import io
import re
import numpy as np
import math
import csv
import pandas as pd
import warnings
import copy
import xml.etree.ElementTree as ET
import xml.dom.minidom
import cryptorandom
from cryptorandom.cryptorandom import SHA256, int_from_hash_py3, int_from_hash
from cryptorandom.sample import random_permutation, sample_by_index

In [2]:
# read in CVRs
cvr_list = read_cvrs(cvr_folder = "Data/hart/OC2021/oc_cvrs_for_testing_v2")
# read in manifest
manifest = pd.read_csv("Data/hart/OC2021/oc_manifest_sample.csv")
len(cvr_list)

9

In [3]:
# set values
seed = 1234567890  # use, e.g., 20 rolls of a 10-sided die. Seed doesn't have to be numeric
replacement = False

risk_function = "alpha_mart"
#because comparison audit, may want to add f parameter to bias alpha towards u
risk_fn = lambda x, m: TestNonnegMean.alpha_mart(x, eta=(m+1)/2 , N=max_cards, f=.1)
g = 0.1
max_cards = 9 + 1 # number in the sample plus 1
error_rate = 0.002

In [4]:
# contests to audit. Edit with details of your contest (eg., Contest 1 is the pres race)
contests = {'PRESIDENT AND VICE PRESIDENT':{'risk_limit':0.05,
                     'cards': 14,
                     'choice_function':'plurality',
                     'n_winners':1,
                     'candidates':['JOSEPH R. BIDEN\nKAMALA D. HARRIS',
                                   'DONALD J. TRUMP\nMICHAEL R. PENCE'],
                     'reported_winners' : ['DONALD J. TRUMP\nMICHAEL R. PENCE'],
                    }
           }

In [5]:
all_assertions = Assertion.make_all_assertions(contests)

In [6]:
contests

{'PRESIDENT AND VICE PRESIDENT': {'risk_limit': 0.05,
  'cards': 14,
  'choice_function': 'plurality',
  'n_winners': 1,
  'candidates': ['JOSEPH R. BIDEN\nKAMALA D. HARRIS',
   'DONALD J. TRUMP\nMICHAEL R. PENCE'],
  'reported_winners': ['DONALD J. TRUMP\nMICHAEL R. PENCE'],
  'assertions': {'DONALD J. TRUMP\nMICHAEL R. PENCE v JOSEPH R. BIDEN\nKAMALA D. HARRIS': <assertion_audit_utils.Assertion at 0x103c913a0>}}}

In [7]:
cvr_list, phantom_vrs = CVR.make_phantoms(max_cards, cvr_list, contests, use_style=True, prefix='phantom-1-')
print(f"Created {phantom_vrs} phantom records")
# assign random sample nums including phantoms
CVR.assign_sample_nums(cvr_list, prng=SHA256(32))

Created 9 phantom records


True

In [10]:
min_margin = find_margins(contests, cvr_list, use_style=True)
min_margin

0.0714285714285714

In [11]:
check_audit_parameters(risk_function, g, error_rate, contests)

In [12]:
# find initial sample size
rf = lambda x,m: risk_fn(x,m)[1]   # p_history is the second returned value
ss_fn = lambda m, r: TestNonnegMean.initial_sample_size(\
                        risk_function=rf, N=max_cards, margin=m, polling=False, \
                        error_rate=error_rate, alpha=r, reps=10) # change for comparison audits
sample_size = find_sample_size(contests, sample_size_function=ss_fn)  
print(sample_size)
sample_size = 6.
print(sample_size)

1.0
6.0


In [13]:
sample_indices = consistent_sampling(
    cvr_list, 
    contests = contests, 
    sample_size_dict = {'PRESIDENT AND VICE PRESIDENT' : sample_size})

In [14]:
# set mvr_list to be the same as cvr_list for now -- sample order??
mvr_list = copy.deepcopy(cvr_list)

In [15]:
manifest

Unnamed: 0,Container,Tabulator,Batch Name,Number of Ballots
0,Mail,1,1,60
1,Mail,1,2,21
2,Mail,1,3,123
3,Mail,1,4,59
4,Mail,1,5,87
...,...,...,...,...
4412,In-Person,In Person - 5,514,418
4413,In-Person,In Person - 5,515,381
4414,In-Person,In Person - 5,516,240
4415,In-Person,In Person - 5,517,403


In [16]:
#consistent_sampling returns indices starting at 1?
sampled_cvrs = [cvr_list[i-1] for i in sample_indices]
#this assumes cvrs and mvrs have already been placed in the same order....
#we might explicitly align them by an ID value
sampled_mvrs = [mvr_list[i-1] for i in sample_indices]

In [17]:
p_max = find_p_values(
    contests = contests, 
    mvr_sample = sampled_mvrs, 
    cvr_sample = sampled_cvrs, 
    use_style = True, 
    risk_function=risk_fn)
summarize_status(contests)

p-values for assertions in contest PRESIDENT AND VICE PRESIDENT
DONALD J. TRUMP
MICHAEL R. PENCE v JOSEPH R. BIDEN
KAMALA D. HARRIS 0.9942611190817792

contest PRESIDENT AND VICE PRESIDENT audit INCOMPLETE at risk limit 0.05. Attained risk 0.9942611190817792
assertions remaining to be proved:
DONALD J. TRUMP
MICHAEL R. PENCE v JOSEPH R. BIDEN
KAMALA D. HARRIS: current risk 0.9942611190817792


False

In [18]:
#need to determine how to escalate with consistent sampling
#replace sample_by_index() in new_sample_size() with consistent sampling
#keep track of incremental samples (rounds)?
#this is very, very slow
new_sample_size(
    contests = contests, 
    mvr_sample = sampled_mvrs, 
    cvr_sample = sampled_cvrs,
    use_style = True,
    risk_function = risk_fn
)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199


  m = (N*t-S)/(N-j+1) if np.isfinite(N) else t   # mean of population after (j-1)st draw, if null is true


(12.0,
 array([12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
        12., 12., 12., 12., 12., 12., 12., 1