**Problem statement: **

suppose we have 100 senators, and A senators are against a bill, where A < 50.  You must have > 50% of the votes for a bill to pass -- aka if the yays are $\leq 0.5$ the bill fails.  

Then on the day of the vote, we know that M senators are randomly sick, stuck in traffic, etc. and don't show up.  What is the probability that the bill passes?


In [1]:
import numpy as np
import numba
import time
# do this regular way, then re-run in Cython
# A = number of senators for the bill
r_sentators = 100

# senators against it = A - r_senators
# M senators miss
# is it binomial or something else here...
# in expectation, wald's equality or something else, it would be pro rata
# ugliest but most literal process would be to treat it like a deck of cards that we keep iterating through until we "deal"
# M different misses

@numba.jit(nopython= True)
def run_sim(A, M, r = 100, n_trials = 1000, min_threshold_to_pass = 0.5):
    """
    This returns the MC estimate of the percent of bills that pass
    """
    # A = number of senators against of the bill, odd naming convention but from the book
    # M = total who miss the vote -- we in effect are conditioning on this...
    # need to be greater than this threshold
    total_who_show = r - M
    total_in_favor = r - A

    senators_array = np.zeros(r)
    senators_array[0:total_in_favor] += 1

    arrivals_array = np.zeros(r)
    arrivals_array[0:total_who_show] += 1 # set equal to 0
    # print(arrivals_array, np.sum(arrivals_array))
    successes = 0.0
    adjusted_threshold = total_who_show * min_threshold_to_pass
    for _ in range(n_trials):        
        if np.sum(np.random.choice(senators_array, total_who_show, replace = False)) > adjusted_threshold:
            # this could easily all be vectorized, but the goal seems to be to do this in python, and ultimately get cython / c, speedups
            successes += 1.0
    return successes / n_trials





In [2]:
Big_A = 45
Big_M = 51
N_trials = 1000000

beg_time = time.clock()
answer = run_sim(A= Big_A, M= Big_M, n_trials= N_trials)
print(answer)



0.837368


In [3]:
# the cython version is below
%load_ext Cython

In [4]:
%%cython

import numpy as np
cimport numpy as np
np.import_array()
import cython
from libc.stdlib cimport rand, RAND_MAX

# @cython.boundscheck(False)
# @cython.wraparound(False)
# @cython.cdivision(True)
def cy_run_sim(int A, int M, int r = 100, int n_trials = 1000, float min_threshold_to_pass = 0.5):
    # A = number of senators against of the bill, odd naming convention but from the book
    # M = total who miss the vote -- we in effect are conditioning on this...    
    cdef int total_who_show = r - M
    cdef float total_who_show_as_float = r - M
    cdef int total_in_favor = r - A    
    cdef np.ndarray[np.int_t, ndim= 1, negative_indices= False, mode = 'c'] senators_array = np.zeros(r, dtype= np.int64)
    # not totally sure how to do this as 32 bit ints
    cdef int my_idx = 0
    cdef int i        
    
    cdef int upperval = 1
    cdef int lowerval = 1
    cdef int successes = 0
    cdef float running_count = 0
    cdef float effective_threshold = total_who_show_as_float * min_threshold_to_pass

    for i in range(total_in_favor):
        senators_array[i] = 1

    for _ in range(n_trials):      
        running_count = 0
        for i in range(total_who_show):         
            my_idx = int((r - i) * (float(rand())/RAND_MAX))             
            running_count += senators_array[my_idx]
            if senators_array[r - i - 1] - senators_array[my_idx] == 0:
                continue            
            senators_array[r - i - 1], senators_array[my_idx] = senators_array[my_idx], senators_array[r - i - 1]
            # the swap
        
        if running_count > effective_threshold:       
            successes += 1
    
    return float(successes) / n_trials


In [5]:
answer = cy_run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
answer

0.838634


Timed comparisons of the numba vs cython simulation, are below


In [6]:
Big_M = 10

In [7]:
%timeit run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
# numba sim

1 loop, best of 3: 2.24 s per loop


In [8]:
%timeit run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
# numba sim

1 loop, best of 3: 2.25 s per loop


In [9]:
Big_M = 51

In [10]:
%timeit cy_run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
# cython sim

1 loop, best of 3: 1.82 s per loop


In [11]:
%timeit cy_run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
# cython sim

1 loop, best of 3: 1.77 s per loop


In [12]:
Big_M = 78

In [13]:
%timeit run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
# numba sim

1 loop, best of 3: 868 ms per loop


In [14]:
%timeit cy_run_sim(A = Big_A, M = Big_M, n_trials = N_trials)
# cython sim

1 loop, best of 3: 805 ms per loop
