In [45]:
import random
from src.utils import noise_bal, project_FPV
from src.intervals import Wald_CI

CVR = [(1,2)]*700 + [(2,)]*770 + [(3,)]*850 + [(4,1)]*350 + [(5,1)]*300 + [(6,)]*100 
noise_level = 0.05
noised_indices = random.sample(range(len(CVR)), k=int(noise_level*len(CVR)))
BAL = [bal for bal in CVR]
for idx in noised_indices:
    BAL[idx] = noise_bal(BAL[idx])

# shuffle BAL and CVR in the same way
combined = list(zip(BAL, CVR))
random.shuffle(combined)
shuffled_BAL, shuffled_CVR = zip(*combined)

To audit steps that come after the election of a candidate, we have to audit "degree 1 tallies" -- part of the votes in the pile have transfered through the winning candidate.

$ T_j = \left(T_{j, CVR} +\sum_{i}\pi_j(BAL_i, CVR_i)\right)+(1-k)\left(T_{wj, CVR} +\sum_{i}\pi_{wj}(BAL_i, CVR_i)\right)$

We have to build confidence intervals for three variables in this expression: $\mu_0 := \mathbb{E}(\pi_j(i))$, $\mu_1:=\mathbb{E}(\pi_{wj}(i))$, and $k$. Because we are combining all three of these confidence intervals into one, we have to budget the size $\alpha$ of each confidence interval: we must pick $\alpha_0$, $\alpha_1$, and $\alpha_k$ such that

$0.05 = \alpha = \alpha_0 + \alpha_1 + \alpha_k$ (this is what Bonferroni demands).

We can (should) be strategic about this: in practice the degree-0 effect (i.e. $\mu_0$) contributes a lot more to $T_j$ than the rest of the sum (most ballots a candidate receives did not transfer through a winner). So we should give more allowance to $\alpha_0$.

This gets a little worse: $k$ itself is built by combining two confidence intervals, so we will have to budget $\alpha_k$ to decide how tight to make *those* intervals.

$k = \displaystyle\frac{n- g - t - (m+1)10^{-6}}{(m+1)T -t} = \frac{c - U}{V}$ where $U = g+t$ and $V=(m+1)T -t$ are variables we must build CIs for.

In [46]:
alpha_0 = 0.03
alpha_1 = 0.01
alpha_k = 0.01
alpha_U = 0.005
alpha_V = 0.005

sample_size = 2*len(BAL)//10

In [47]:
t_sample = []
g_sample = []
T_sample = []

hopeful = {1,2,3,4,6} # in step 3a, 5 is eliminated
hopeful_without1 = {2,3,4,6} 

for i in range(0, sample_size):
    bal, cvr = shuffled_BAL[i], shuffled_CVR[i]
    fpv_bal, fpv_cvr = project_FPV(bal, hopeful), project_FPV(cvr, hopeful)
    spv_bal, spv_cvr = project_FPV(bal, hopeful_without1), project_FPV(cvr, hopeful_without1) # note this is only the spv when fpv is 1
    if fpv_bal == fpv_cvr:
        g_sample.append(0.0)
        T_sample.append(0.0)
        if fpv_bal != 1:
            t_sample.append(0.0)
            continue
        else:
            if spv_bal == spv_cvr:
                t_sample.append(0.0)
                continue
            else: 
                if spv_cvr == 6: # this was previously a t vote, now it is not
                    t_sample.append(-1.0)
                elif spv_bal == 6: # this was previously not a t vote, now it is
                    t_sample.append(1.0)
                else:
                    t_sample.append(0.0)
            continue
    else: #fpv disagreement
        if fpv_cvr == 1: # this was previously a T vote, now it is not
            T_sample.append(-1.0)
            if spv_cvr == 6: # this was previously a t vote, now it is not
                t_sample.append(-1.0)
            else:
                t_sample.append(0.0)
        elif fpv_bal == 1: # this was previously not a T vote, now it is
            T_sample.append(1.0)
            if spv_bal == 6: # this was previously not a t vote, now it is
                t_sample.append(1.0)
            else:
                t_sample.append(0.0)
        else: # this was previously not a FPV 1 vote, and it still isn't
            T_sample.append(0.0)
            t_sample.append(0.0)
        if fpv_cvr == 6: # this was previously a g vote, now it is not
            g_sample.append(-1.0)
        elif fpv_bal == 6: # this was previously not a g vote, now it is
            g_sample.append(1.0)
        else: 
            g_sample.append(0.0)

m=2
n=3070
gt_sample = [g_sample[i] + t_sample[i] for i in range(len(g_sample))]
Tt_sample = [(m+1)*T_sample[i] - t_sample[i] for i in range(len(t_sample))]
gt_ci_lo, gt_ci_hi = Wald_CI(gt_sample, N=len(BAL), alpha = 0.01)
print(f"gt_ci bounds before scaling: ({gt_ci_lo:.4f}, {gt_ci_hi:.4f})")
gt_ci_lo *= len(BAL)
gt_ci_hi *= len(BAL)
gt_ci_lo += 100
gt_ci_hi += 100
print(f"(g+t) CI: ({gt_ci_lo:.4f}, {gt_ci_hi:.4f})")
Tt_ci_lo, Tt_ci_hi = Wald_CI(Tt_sample, N=len(BAL), alpha = alpha_V, bounds=(-4.0, 4.0))
Tt_ci_lo *= len(BAL)
Tt_ci_hi *= len(BAL)
Tt_ci_lo += (m+1)*1000 - 0
Tt_ci_hi += (m+1)*1000 - 0
print(f"(m+1)T - t CI: ({Tt_ci_lo:.4f}, {Tt_ci_hi:.4f})")
c = n + (m+1)*10**(-6)
num_hi = c - gt_ci_lo
num_lo = c - gt_ci_hi
k_hi=num_hi/Tt_ci_lo
k_lo=num_lo/Tt_ci_hi
print(f"k CI: ({k_lo:.6f}, {k_hi:.6f})")

round3a_deg0_samples = {i: [] for i in range(2, 7)} 
round3a_deg1_samples = {i: [] for i in range(2, 7)} 

for i in range(0, sample_size):
    bal, cvr = shuffled_BAL[i], shuffled_CVR[i]
    fpv_bal, fpv_cvr = project_FPV(bal, hopeful), project_FPV(cvr, hopeful)
    spv_bal, spv_cvr = project_FPV(bal, hopeful_without1), project_FPV(cvr, hopeful_without1)
    if fpv_bal == fpv_cvr:
        if fpv_bal != 1 or spv_bal == spv_cvr:
            for i in range(2,7):
                round3a_deg0_samples[i].append(0.0)
                round3a_deg1_samples[i].append(0.0)
        else: # both fpv are 1 and spv differ
            # in either case the degree 0 impact null
            for i in range(2,7):
                round3a_deg0_samples[i].append(0.0)
                # the deg 1 impact on spv_cvr is -1, on spv_bal is +1, rest are 0
                if i == spv_cvr:
                    round3a_deg1_samples[i].append(-1.0)
                elif i == spv_bal:
                    round3a_deg1_samples[i].append(1.0)
                else:
                    round3a_deg1_samples[i].append(0.0)
    else: # different fpv 
        if fpv_cvr == 1: # previously this was a deg 1 vote, now it is not
            round3a_deg1_samples[spv_cvr].append(-1.0)
            round3a_deg0_samples[fpv_bal].append(1.0)
            # rest are 0
            for i in range(2,7):
                if i != spv_cvr:
                    round3a_deg1_samples[i].append(0.0)
                if i != fpv_bal:
                    round3a_deg0_samples[i].append(0.0)
        elif fpv_bal == 1: # previously this was not a deg 1 vote, now it is
            round3a_deg1_samples[spv_bal].append(1.0)
            round3a_deg0_samples[fpv_cvr].append(-1.0)
            # rest are 0
            for i in range(2,7):
                if i != spv_bal:
                    round3a_deg1_samples[i].append(0.0)
                if i != fpv_cvr:
                    round3a_deg0_samples[i].append(0.0)
        else: # only degree 0 impact
            round3a_deg0_samples[fpv_bal].append(1.0)
            round3a_deg0_samples[fpv_cvr].append(-1.0)
            for i in range(2,7):
                round3a_deg1_samples[i].append(0.0)
                if i != fpv_bal and i != fpv_cvr:
                    round3a_deg0_samples[i].append(0.0)

# phew!
case3a_CVR_FPV = {i: sum(1 for bal in CVR if project_FPV(bal, hopeful) == i) for i in range(1,7)}

conf_intervals_3a = {}
for i in [2,3,4,6]:
    deg0_ci_lo, deg0_ci_hi = Wald_CI(round3a_deg0_samples[i], N=len(BAL), bounds=(-1.0, 1.0))
    deg0_ci_lo *= len(BAL)
    deg0_ci_hi *= len(BAL)
    deg0_ci_lo += case3a_CVR_FPV[i]
    deg0_ci_hi += case3a_CVR_FPV[i]
    deg1_ci_lo, deg1_ci_hi = Wald_CI(round3a_deg1_samples[i], N=len(BAL), bounds=(0, 1.0)) # in this example, the deg1 effects are non-negative
    deg1_ci_lo *= len(BAL)
    deg1_ci_hi *= len(BAL)
    deg1_ci_lo += 0
    deg1_ci_hi += 0
    total_ci_lo = deg0_ci_lo + (1-k_hi)*deg1_ci_lo
    total_ci_hi = deg0_ci_hi + (1-k_lo)*deg1_ci_hi
    if i != 6: print(f"Round 3a, Candidate {i}, Total: CI=({total_ci_lo:.4f}, {total_ci_hi:.4f})")
    conf_intervals_3a[i] = (total_ci_lo, total_ci_hi)
    if i == 6:
        q_lo = (len(BAL)- total_ci_hi)/3+10**(-6)
        q_hi = (len(BAL)- total_ci_lo)/3-10**(-6)
        print(f"exhausted in round 3a CI=({total_ci_lo:.4f}, {total_ci_hi:.4f})")
        print(f"q_low = {q_lo:.4f}, q_high = {q_hi:.4f}")

if conf_intervals_3a[2][1] < q_lo and conf_intervals_3a[3][1] < q_lo and conf_intervals_3a[4][1] < conf_intervals_3a[2][0] and conf_intervals_3a[4][1] < conf_intervals_3a[3][0]:
    print(f"Step 3a successful: Nobody has quota and 4 is eliminated.")

gt_ci bounds before scaling: (0.0034, 0.0292)
(g+t) CI: (110.3348, 189.6652)
(m+1)T - t CI: (2787.2404, 3032.7596)
k CI: (0.949741, 1.061862)
Round 3a, Candidate 2, Total: CI=(733.3727, 796.3619)
Round 3a, Candidate 3, Total: CI=(795.2791, 855.3606)
Round 3a, Candidate 4, Total: CI=(341.8148, 398.8248)
exhausted in round 3a CI=(119.8185, 181.2614)
q_low = 962.9129, q_high = 983.3938
Step 3a successful: Nobody has quota and 4 is eliminated.


In [48]:
import math

u_bar = sum(gt_sample)/len(gt_sample)
v_bar = sum(Tt_sample)/len(Tt_sample)
t_votes = sum(1 for bal in CVR if project_FPV(bal, hopeful) == 1 and project_FPV(bal, hopeful_without1) == 6)
g_votes= sum(1 for bal in CVR if project_FPV(bal, hopeful) == 6)
c_u=(n-(g_votes+t_votes)+(m+1)*10**(-6))/len(CVR)
c_v =((m+1)*case3a_CVR_FPV[1]-t_votes)/len(CVR)
s_uv = sum( (gt_sample[i]-u_bar)*(Tt_sample[i]-v_bar) for i in range(len(gt_sample)) ) / (len(gt_sample)-1)
s_uu = sum( (gt_sample[i]-u_bar)**2 for i in range(len(gt_sample)) ) / (len(gt_sample)-1)
s_vv = sum( (Tt_sample[i]-v_bar)**2 for i in range(len(Tt_sample)) ) / (len(Tt_sample)-1)
k_hat = (c_u- u_bar) / (c_v + v_bar)

covava = (1-sample_size/len(CVR))/(len(CVR) * (c_v + v_bar)**2) * (s_uu + 2*k_hat *s_uv + k_hat**2 * s_vv)
SE = math.sqrt(covava)
print(SE)
k_ci_lo = k_hat - 1.96*SE
k_ci_hi = k_hat + 1.96*SE
print(f"Alternative k CI: ({k_ci_lo:.6f}, {k_ci_hi:.6f})")

0.008424723416940713
Alternative k CI: (0.987319, 1.020344)


# Repeat this process for a full deg-1 CI

Want to audit $M_{ij} = T_i - T_j$ after a first winner was elected (degree 1).

$M_{ij}= T_i - T_j + N\mu_0 + (1-k)(T_{wi} - T_{wj} + N\mu_1)$ where $k =\displaystyle\frac{c_u -\mu_u}{c_v+\mu_v}$.

$\vec{\theta}= (\mu_0,\mu_1, \mu_u,\mu_v)$.

In [51]:
import numpy as np

# Start with the audit for i = 3 vs j = 4
i, j = 2, 4
T_i, T_j = case3a_CVR_FPV[i], case3a_CVR_FPV[j]
T1_i, T1_j = 0, 0 # the CVR sees no votes transfer from 1 to anyone else
mu0_sample = [round3a_deg0_samples[i][k] - round3a_deg0_samples[j][k] for k in range(len(round3a_deg0_samples[i]))]
mu1_sample = [round3a_deg1_samples[i][k] - round3a_deg1_samples[j][k] for k in range(len(round3a_deg1_samples[i]))]
mu0_bar = sum(mu0_sample)/len(mu0_sample)
mu1_bar = sum(mu1_sample)/len(mu1_sample)
# u sample is gt_sample and v sample is Tt_sample

s_00 = sum( (mu0_sample[k]-mu0_bar)**2 for k in range(len(mu0_sample)) ) / (len(mu0_sample)-1)
s_11 = sum( (mu1_sample[k]-mu1_bar)**2 for k in range(len(mu1_sample)) ) / (len(mu1_sample)-1)
s_01 = sum( (mu0_sample[k]-mu0_bar)*(mu1_sample[k]-mu1_bar) for k in range(len(mu0_sample)) ) / (len(mu0_sample)-1)
s_u0 = sum( (mu0_sample[k]-mu0_bar)*(gt_sample[k]-u_bar) for k in range(len(mu0_sample)) ) / (len(mu0_sample)-1)
s_v0 = sum( (mu0_sample[k]-mu0_bar)*(Tt_sample[k]-v_bar) for k in range(len(mu0_sample)) ) / (len(mu0_sample)-1)
s_u1 = sum( (mu1_sample[k]-mu1_bar)*(gt_sample[k]-u_bar) for k in range(len(mu1_sample)) ) / (len(mu1_sample)-1)
s_v1 = sum( (mu1_sample[k]-mu1_bar)*(Tt_sample[k]-v_bar) for k in range(len(mu1_sample)) ) / (len(mu1_sample)-1)
# s_uu, s_vv, s_uv already computed above

S_theta = np.array([[s_00, s_01, s_u0, s_v0],
                    [s_01, s_11, s_u1, s_v1],
                    [s_u0, s_u1, s_uu, s_uv],
                    [s_v0, s_v1, s_uv, s_vv]])

SRSWOR_adjustment = (1 - sample_size/len(CVR))

cov_Mij = SRSWOR_adjustment / sample_size * S_theta

N = len(CVR)
c_u=(N-(g_votes+t_votes)+(m+1)*10**(-6))/N
c_v =((m+1)*case3a_CVR_FPV[1]-t_votes)/N
grad_T = np.array([N, (1-k_hat)*N, (T1_i-T1_j+N * mu1_bar)/(c_v + v_bar), k_hat*(T1_i - T1_j + N * mu1_bar)/(c_v + v_bar)])
var_T = grad_T @ cov_Mij @ grad_T.T
SE_T = math.sqrt(var_T)
print(f"Audit SE for M_{i}{j}: {SE_T:.4f}")
M_ij_hat = T_i - T_j + N * mu0_bar + (1 - k_hat)*(T1_i - T1_j + N * mu1_bar)
ci_lo = M_ij_hat - 1.96*SE_T
ci_hi = M_ij_hat + 1.96*SE_T
print(f"Audit CI for M_{i}{j}: ({ci_lo:.4f}, {ci_hi:.4f})")

Audit SE for M_24: 20.1358
Audit CI for M_24: (360.6487, 439.5811)


In [50]:
mu1_sample

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.

In [1]:
import numpy as np 

fpv_vec = np.array([1,2,2,5,1,2])
wt_vec = np.array([55, 20, 30, 23, 2, 1])
np.bincount(fpv_vec, weights=wt_vec)

array([ 0., 57., 51.,  0.,  0., 23.])