In [None]:
# 2023-07
# null-hypothesis significance testing for a shuffled deck of cards
# designing a rule of thumb that can be applied quickly
# based on counting red vs. black (or flipped vs. unflipped) cards

In [15]:
import numpy as np
from collections import defaultdict

In [14]:
def maxrun_sample():
    deck = [0]*26 + [1]*26
    np.random.shuffle(deck)
    last = deck[0]
    maxrun = 1
    run = 1
    for card in deck[1:]:
        if card == last:
            run += 1
            maxrun = max(maxrun, run)
        else:
            run = 1
        last = card
    
    return maxrun

In [31]:
def allruns():
    deck = [0]*26 + [1]*26
    np.random.shuffle(deck)
    runs = []
    last = deck[0]
    run = 1
    for card in deck[1:]:
        if card == last:
            run += 1
        else:
            runs.append(run)
            run = 1
        last = card
    runs.append(run)
    return runs

In [38]:
set(sum(allruns()) for _ in range(100))

{52}

In [21]:
def tally(l):
    d = defaultdict(lambda:0)
    for item in l:
        d[item] += 1
    return d

In [42]:
d = tally(maxrun_sample() for _ in range(1_000_000))

In [43]:
{l:d[l]/10_000 for l in range(20)}

{0: 0.0,
 1: 0.0,
 2: 0.0056,
 3: 2.5965,
 4: 20.0563,
 5: 30.9298,
 6: 23.1889,
 7: 12.7817,
 8: 6.0318,
 9: 2.6374,
 10: 1.0923,
 11: 0.4358,
 12: 0.1596,
 13: 0.0556,
 14: 0.0191,
 15: 0.0069,
 16: 0.0023,
 17: 0.0003,
 18: 0.0001,
 19: 0.0}

In [49]:
sum(d[l] for l in range(9,20))

44094

In [50]:
sum(d[l] for l in range(4))

26021

In [61]:
d = tally(sorted(allruns())[-2] for _ in range(1_000_000))
{l:d[l]/10_000 for l in range(20)}

{0: 0.0,
 1: 0.0,
 2: 0.0838,
 3: 13.5687,
 4: 44.534,
 5: 29.0781,
 6: 9.5507,
 7: 2.4621,
 8: 0.5554,
 9: 0.1277,
 10: 0.0301,
 11: 0.0067,
 12: 0.0023,
 13: 0.0003,
 14: 0.0001,
 15: 0.0,
 16: 0.0,
 17: 0.0,
 18: 0.0,
 19: 0.0}

In [64]:
sum(d[l] for l in range(3))

838

In [63]:
sum(d[l] for l in range(7,20))

31847

In [65]:
d = tally(sorted(allruns())[-3] for _ in range(1_000_000))
{l:d[l]/10_000 for l in range(20)}

{0: 0.0,
 1: 0.0,
 2: 0.6892,
 3: 36.5962,
 4: 48.6025,
 5: 12.3738,
 6: 1.5795,
 7: 0.1448,
 8: 0.0132,
 9: 0.0008,
 10: 0.0,
 11: 0.0,
 12: 0.0,
 13: 0.0,
 14: 0.0,
 15: 0.0,
 16: 0.0,
 17: 0.0,
 18: 0.0,
 19: 0.0}

In [67]:
100 - sum(d[l] for l in [0, 1, 2, 6, 7, 8, 9])/10_000

97.5725

In [68]:
def okay(runs):
    s = sorted(runs)
    return s[-1] < 9 and s[-2] < 7

In [69]:
tally(okay(allruns()) for _ in range(1_000_000))

defaultdict(<function __main__.tally.<locals>.<lambda>()>,
            {True: 935975, False: 64025})