## The Urn Model

In [1]:
import numpy as np

In [2]:
urn = ["b", "b", "b", "w", "w"]


In [3]:
print("Sample 1:", np.random.choice(urn, size=2, replace=False))
print("Sample 2:", np.random.choice(urn, size=2, replace=False))

Sample 1: ['b' 'b']
Sample 2: ['b' 'b']


In [6]:
n = 10_000
samples = [np.random.choice(urn,size=2,replace=False) for _ in range(n)]
is_matching = [marble1 == marble2 for marble1,marble2 in samples]
print(f"Proportion of samples with matching marbles: {np.mean(is_matching)}")


Proportion of samples with matching marbles: 0.4007


## sampling Designs

In [7]:
from itertools import combinations

In [14]:
all_samples= ["".join(sample) for sample in combinations("ABCDEFG",3)]
print(all_samples)
print("Number of Samples:", len(all_samples))

['ABC', 'ABD', 'ABE', 'ABF', 'ABG', 'ACD', 'ACE', 'ACF', 'ACG', 'ADE', 'ADF', 'ADG', 'AEF', 'AEG', 'AFG', 'BCD', 'BCE', 'BCF', 'BCG', 'BDE', 'BDF', 'BDG', 'BEF', 'BEG', 'BFG', 'CDE', 'CDF', 'CDG', 'CEF', 'CEG', 'CFG', 'DEF', 'DEG', 'DFG', 'EFG']
Number of Samples: 35


In [15]:
from itertools import permutations
print(["".join(sample) for sample in permutations("ABC")])

['ABC', 'ACB', 'BAC', 'BCA', 'CAB', 'CBA']


## Simulating the sample distribution

In [16]:
urn = [1, 1, 0, 1, 0, 1, 0]

In [20]:
sample = np.random.choice(urn,size=3,replace=False)
print(f"Sample:{sample}")
print(f"Pro Failures:{sample.mean()}")

Sample:[0 1 1]
Pro Failures:0.6666666666666666


In [24]:
samples = [np.random.choice(urn, size=3, replace=False) for _ in range(10_000)]
prop_failures = [s.mean() for s in samples]


In [28]:
import pandas as pd

In [29]:
unique_els, counts_els = np.unique(prop_failures, return_counts=True)
pd.DataFrame({
"Proportion of failures": unique_els,
"Fraction of samples": counts_els / 10_000,
})

Unnamed: 0,Proportion of failures,Fraction of samples
0,0.0,0.0266
1,0.333333,0.3444
2,0.666667,0.5137
3,1.0,0.1153


## Simulation with the Hypergeometric Distribution

In [30]:
simulations_fast = np.random.hypergeometric(
ngood=4, nbad=3, nsample=3, size=10_000
)
print(simulations_fast)

[2 1 2 ... 3 1 1]


In [31]:
unique_els, counts_els = np.unique(simulations_fast, return_counts=True)
pd.DataFrame({
"Number of failures": unique_els,
"Fraction of samples": counts_els / 10_000,
})

Unnamed: 0,Number of failures,Fraction of samples
0,0,0.0269
1,1,0.3501
2,2,0.5057
3,3,0.1173


In [35]:
from scipy.stats import hypergeom
num_failures = [0,1,2,3]

pd.DataFrame({
"Number of failures": num_failures,
"Fraction of samples": hypergeom.pmf(num_failures, 7, 4, 3),
})

Unnamed: 0,Number of failures,Fraction of samples
0,0,0.028571
1,1,0.342857
2,2,0.514286
3,3,0.114286
