In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

In [None]:
# Simulating the outcome of a private coin landing heads by performing 10 tosses

np.random.binomial(10,0.5)

2

In [67]:
# We perform 10,000 experiments with 10 tosses of a fair coin and write them to the result variable.

n = 10000
result = [] 
for i in range(n):
    np.random.seed(i) #To ensure reproducibility of the experiment, we set the seed
    result.append(np.random.binomial(10,0.5))

In [71]:
# Convert the result to dataframe

heads_df = pd.DataFrame(result, columns=['heads'])

In [72]:
# We calculate how much deviation from a truly even coin (where the probability of getting heads is 0.5) occurred in each experiment in both directions

heads_in_even_coin = 5
heads_df['heads_dev'] = abs(heads_df['heads'] - heads_in_even_coin)

In [73]:
heads_df

Unnamed: 0,heads,heads_dev
0,5,0
1,5,0
2,5,0
3,5,0
4,8,3
...,...,...
9995,5,0
9996,6,1
9997,2,3
9998,4,1


In [75]:
# Let's consider a scenario with a fair coin toss. After performing 10 tosses, we got 7 heads.

# H₀ (null hypothesis): The coin is fair.
# H₁ (alternative hypothesis): The coin is not fair.
# By accepting the alternative hypothesis, we would make a false positive error (Type I error) in 0.3515% of cases in a two-tailed test.

print(f"The probability of not accidentally getting such or a greater deviation from the rate of getting heads on a truly fair coin {(heads_df['heads_dev'] >= 2).mean()}")

The probability of not accidentally getting such or a greater deviation from the rate of getting heads on a truly fair coin 0.3515


# P-value scale for different heads deviations

In [82]:
# Let's create a list of all possible deviations

heads_devs = [0,1,2,3,4,5]

In [None]:
# calculate the percentage of results from 10,000 experiments that are >= in absolute value to each deviation from the list

result = []
for dev in heads_devs:
    result.append((dev, (heads_df['heads_dev'] >= dev).mean()))

In [None]:
# The more we set the threshold parameter of rejection, the less often we can make an erroneous rejection of the null hypothesis.

heads_pvals = pd.DataFrame(result, columns=[['heads', 'p_value']])

In [80]:
heads_pvals

Unnamed: 0,heads,p_value
0,0,1.0
1,1,0.7573
2,2,0.3515
3,3,0.1124
4,4,0.0229
5,5,0.0009


# Satisfaction case

In [4]:
sample_size = 115

In [5]:
n = 10000
result = []
for i in range(n):
    np.random.seed(i)
    result.append(np.random.binomial(sample_size,0.8))

In [6]:
sat_df = pd.DataFrame(result, columns=['count_of_sat'])

In [7]:
sat_df['sat_dev'] = sat_df['count_of_sat'] - 0.8 * sample_size

In [8]:
sat_df

Unnamed: 0,count_of_sat,sat_dev
0,92,0.0
1,93,1.0
2,93,1.0
3,92,0.0
4,84,-8.0
...,...,...
9995,93,1.0
9996,90,-2.0
9997,99,7.0
9998,96,4.0


In [9]:
(sat_df['sat_dev'] <= -8).mean()

np.float64(0.0437)

# p-value scale satisfaction

In [10]:
sat_devs = list(range(0, int(0.8 * sample_size)+1))


In [11]:
result = []
for dev in sat_devs:
    result.append((dev, (sat_df['sat_dev'] <= -dev).mean()))

In [12]:
pvals = pd.DataFrame(result, columns=[['people', 'p_value']])

In [13]:
pvals.head(20)

Unnamed: 0,people,p_value
0,0,0.5348
1,1,0.4498
2,2,0.3632
3,3,0.2777
4,4,0.2067
5,5,0.1464
6,6,0.1013
7,7,0.0685
8,8,0.0437
9,9,0.0277


# P-value with statistical test

In [14]:
proportions_ztest(84, 115, 0.8, alternative='smaller')[1]

np.float64(0.046362399106540116)

In [19]:
data = {'people': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]}
pvals = pd.DataFrame(data)

In [None]:
def debug_proportions_ztest(x):
    count = 92 - x
    nobs = 115
    value = 0.8
    return proportions_ztest(count, nobs, value=value, alternative='smaller')[1]

In [None]:
pvals['pval_ztest'] = pvals['people'].apply(lambda x: debug_proportions_ztest(x))
     

In [25]:
print(pvals)

    people  pval_ztest
0        0    0.500000
1        1    0.409252
2        2    0.325579
3        3    0.251815
4        4    0.189428
5        5    0.138656
6        6    0.098803
7        7    0.068568
8        8    0.046362
9        9    0.030552
10      10    0.019627
11      11    0.012294
12      12    0.007509
13      13    0.004473
14      14    0.002598
15      15    0.001471
16      16    0.000812
17      17    0.000437
18      18    0.000229
19      19    0.000117
