<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#FDR-&amp;-FOR" data-toc-modified-id="FDR-&amp;-FOR-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>FDR &amp; FOR</a></span></li><li><span><a href="#Using-the-Solver-in-Statsmodels" data-toc-modified-id="Using-the-Solver-in-Statsmodels-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Using the Solver in Statsmodels</a></span><ul class="toc-item"><li><span><a href="#Calculate-Beta" data-toc-modified-id="Calculate-Beta-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Calculate Beta</a></span></li><li><span><a href="#Calculate-Sample-Size" data-toc-modified-id="Calculate-Sample-Size-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Calculate Sample Size</a></span></li><li><span><a href="#Calculate-Effect-Size" data-toc-modified-id="Calculate-Effect-Size-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Calculate Effect Size</a></span></li></ul></li><li><span><a href="#Using-Simulation" data-toc-modified-id="Using-Simulation-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Using Simulation</a></span><ul class="toc-item"><li><span><a href="#Calculate-Beta" data-toc-modified-id="Calculate-Beta-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Calculate Beta</a></span></li><li><span><a href="#Calculate-Sample-Size" data-toc-modified-id="Calculate-Sample-Size-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Calculate Sample Size</a></span></li><li><span><a href="#Calculate-Raw-Effect-Size" data-toc-modified-id="Calculate-Raw-Effect-Size-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Calculate Raw Effect Size</a></span></li></ul></li></ul></div>

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import IPython as ip
mpl.style.use('ggplot')
mpl.rc('font', family='Noto Sans CJK TC')
ip.display.set_matplotlib_formats('svg')

In [3]:
# our sample 1
loc = 170
scale = 5

In [4]:
# test parameters
anr = 0.5
alpha = 0.05  # = P(predicted + | actual -)
beta = 0.20  #  = P(predicted - | actual +)
# cl = 0.95  # = 1-alpha
# power = 0.80  # = 1-beta
raw_effect_size = 2
sample_size_1 = 100
sample_size_2 = 100

# FDR & FOR

$
\text{false discovery rate} = \dfrac{ \alpha \cdot \text{actual negative rate} }{\text{predicted positive rate}} \\
$

$
\text{false omission rate} = \dfrac{ \beta \cdot \text{actual positive rate} }{\text{predicted negative rate}} \\
$

In [5]:
# alpha = 0.05
# beta = 0.05
# anr = 0.94
# # FDR -> 0.4519
# # FOR -> 0.0033

apr = 1-anr
power = 1-beta
cl = 1-alpha

p_pp = alpha*anr + power*apr
p_pn = cl*anr + beta*apr

fdr = alpha*anr / p_pp  # = P(actual - | predicted -)
for_ = beta*apr / p_pn  # = P(actual + | predicted -)

display(fdr)
display(for_)

0.058823529411764705

0.1739130434782609

# Using the Solver in Statsmodels

## Calculate Beta

In [6]:
%%time
1-sm.stats.tt_ind_solve_power(
    alpha=alpha,
    effect_size=raw_effect_size/scale,
    nobs1=sample_size_1,
    ratio=sample_size_2/sample_size_1,
    power=None,
)

CPU times: user 1.57 ms, sys: 516 µs, total: 2.09 ms
Wall time: 1.73 ms


0.19635250345692312

## Calculate Sample Size

In [7]:
%%time
sm.stats.tt_ind_solve_power(
    alpha=alpha,
    power=1-beta,
    # standardized effect size
    # see also: https://en.wikipedia.org/wiki/Effect_size#Cohen's_d
    effect_size=raw_effect_size/scale,
    ratio=1, # = sample_size_2 / sample_size_1
    nobs1=None,
)

CPU times: user 11.7 ms, sys: 3.24 ms, total: 15 ms
Wall time: 12.2 ms


99.08032683981143

## Calculate Effect Size

In [8]:
%%time
sm.stats.tt_ind_solve_power(
    alpha=alpha,
    power=1-beta,
    effect_size=None,
    nobs1=sample_size_1,
    ratio=sample_size_2/sample_size_1,
)*scale

CPU times: user 10.5 ms, sys: 3.53 ms, total: 14 ms
Wall time: 10.8 ms


1.9906955869556378

# Using Simulation

In [9]:
simulation_n = 1000

## Calculate Beta

In [10]:
%%time
np.random.seed(20180702)
sample_1 = sp.stats.norm.rvs(loc=loc, scale=scale, size=(sample_size_1, simulation_n))
sample_2 = sp.stats.norm.rvs(loc=loc+raw_effect_size, scale=scale, size=(sample_size_2, simulation_n))
observed_beta = (sp.stats.ttest_ind(sample_1, sample_2).pvalue >= alpha).sum() / simulation_n
print(observed_beta)

0.214
CPU times: user 14.8 ms, sys: 2.86 ms, total: 17.6 ms
Wall time: 15.1 ms


## Calculate Sample Size

In [11]:
def calc_beta_given_sample_size(x):
    np.random.seed(20180702)
    sample_1 = sp.stats.norm.rvs(loc=loc, scale=scale, size=(int(x), simulation_n))
    sample_2 = sp.stats.norm.rvs(loc=loc+raw_effect_size, scale=scale, size=(int(x), simulation_n))
    observed_beta = (sp.stats.ttest_ind(sample_1, sample_2).pvalue >= alpha).sum() / simulation_n
    return observed_beta

In [12]:
%%time
# === given observed_beta = beta, find the x between 200 and 100
sp.optimize.brentq(
    lambda x: calc_beta_given_sample_size(x) - beta,
    120, 80
)

CPU times: user 53.5 ms, sys: 4.16 ms, total: 57.7 ms
Wall time: 54.6 ms


103.5522773372672

## Calculate Raw Effect Size

In [13]:
def calc_beta_given_raw_effect_size(x):
    np.random.seed(20180702)
    sample_1 = sp.stats.norm.rvs(loc=loc, scale=scale, size=(sample_size_1, simulation_n))
    sample_2 = sp.stats.norm.rvs(loc=loc+x, scale=scale, size=(sample_size_2, simulation_n))
    observed_beta = (sp.stats.ttest_ind(sample_1, sample_2).pvalue >= alpha).sum() / simulation_n
    return observed_beta

In [14]:
%%time
sp.optimize.brentq(
    lambda x: calc_beta_given_raw_effect_size(x) - beta,
    3, 0
)

CPU times: user 107 ms, sys: 6.99 ms, total: 114 ms
Wall time: 116 ms


2.0355119938176798