In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import IPython as ip
mpl.style.use('ggplot')
mpl.rc('font', family='Noto Sans CJK TC')
ip.display.set_matplotlib_formats('svg')

In [3]:
# our sample 1
loc = 170
scale = 5

In [4]:
# test parameters
p_an = 0.5
alpha = 0.05  # = P(predicted + | actual -)
beta = 0.20  #  = P(predicted - | actual +)
# cl = 0.95  # = 1-alpha
# power = 0.80  # = 1-beta
raw_effect_size = 2
sample_size_1 = 100
sample_size_2 = 100

## FDR & FOR

$
\text{FDR} = \dfrac{ \alpha\,P(\text{actual negative}) }{P(\text{predicted positive})} \\
$

$
\text{FOR} = \dfrac{ \beta\,P(\text{actual positive}) }{P(\text{predicted negative})} \\
$

In [5]:
# alpha = 0.05
# beta = 0.05
# p_an = 0.94
# # FDR -> 0.4519
# # FOR -> 0.0033

p_ap = 1-p_an
power = 1-beta
cl = 1-alpha

p_pp = alpha*p_an + power*p_ap
p_pn = cl*p_an + beta*p_ap

fdr = alpha*p_an / p_pp  # = P(actual - | predicted -)
for_ = beta*p_ap / p_pn  # = P(actual + | predicted -)

display(fdr)
display(for_)

0.058823529411764705

0.1739130434782609

## Using the Solver in Statsmodels

### Calculate Beta

In [6]:
%%time
1-sm.stats.tt_ind_solve_power(
    alpha=alpha,
    effect_size=raw_effect_size/scale,
    nobs1=sample_size_1,
    ratio=sample_size_2/sample_size_1,
    power=None,
)

CPU times: user 1.4 ms, sys: 241 µs, total: 1.64 ms
Wall time: 1.51 ms


0.19635250345692312

### Calculate Sample Size

In [7]:
%%time
sm.stats.tt_ind_solve_power(
    alpha=alpha,
    power=1-beta,
    # standardized effect size
    # see also: https://en.wikipedia.org/wiki/Effect_size#Cohen's_d
    effect_size=raw_effect_size/scale,
    ratio=1, # = sample_size_2 / sample_size_1
    nobs1=None,
)

CPU times: user 11.8 ms, sys: 2.32 ms, total: 14.1 ms
Wall time: 12.4 ms


99.08032683981143

### Calculate Effect Size

In [8]:
%%time
sm.stats.tt_ind_solve_power(
    alpha=alpha,
    power=1-beta,
    effect_size=None,
    nobs1=sample_size_1,
    ratio=sample_size_2/sample_size_1,
)*scale

CPU times: user 11.8 ms, sys: 2.67 ms, total: 14.5 ms
Wall time: 12.4 ms


1.9906955869556378

## Using Simulation

In [9]:
simulation_n = 1000

### Calculate Beta

In [10]:
%%time
np.random.seed(20180702)
sample_1 = sp.stats.norm.rvs(loc=loc, scale=scale, size=(sample_size_1, simulation_n))
sample_2 = sp.stats.norm.rvs(loc=loc+raw_effect_size, scale=scale, size=(sample_size_2, simulation_n))
observed_beta = (sp.stats.ttest_ind(sample_1, sample_2).pvalue >= alpha).sum() / simulation_n
print(observed_beta)

0.214
CPU times: user 14.9 ms, sys: 2.69 ms, total: 17.6 ms
Wall time: 17.4 ms


### Calculate Sample Size

In [11]:
def calc_beta_given_sample_size(x):
    np.random.seed(20180702)
    sample_1 = sp.stats.norm.rvs(loc=loc, scale=scale, size=(int(x), simulation_n))
    sample_2 = sp.stats.norm.rvs(loc=loc+raw_effect_size, scale=scale, size=(int(x), simulation_n))
    observed_beta = (sp.stats.ttest_ind(sample_1, sample_2).pvalue >= alpha).sum() / simulation_n
    return observed_beta

In [12]:
%%time
# === given observed_beta = beta, find the x between 200 and 100
sp.optimize.brentq(
    lambda x: calc_beta_given_sample_size(x) - beta,
    120, 80
)

CPU times: user 57.4 ms, sys: 4.35 ms, total: 61.7 ms
Wall time: 61 ms


103.5522773372672

### Calculate Raw Effect Size

In [13]:
def calc_beta_given_raw_effect_size(x):
    np.random.seed(20180702)
    sample_1 = sp.stats.norm.rvs(loc=loc, scale=scale, size=(sample_size_1, simulation_n))
    sample_2 = sp.stats.norm.rvs(loc=loc+x, scale=scale, size=(sample_size_2, simulation_n))
    observed_beta = (sp.stats.ttest_ind(sample_1, sample_2).pvalue >= alpha).sum() / simulation_n
    return observed_beta

In [14]:
%%time
sp.optimize.brentq(
    lambda x: calc_beta_given_raw_effect_size(x) - beta,
    3, 0
)

CPU times: user 88.1 ms, sys: 3.82 ms, total: 91.9 ms
Wall time: 96.4 ms


2.0355119938176798