Skip to content

Latest commit

 

History

History
452 lines (396 loc) · 10.7 KB

sig_pow.md

File metadata and controls

452 lines (396 loc) · 10.7 KB

Reproduce the "R from Python" plot in Python using plotnine. This is a line for line tranlation of significance_power_visuals.R.

# import our modules
import numpy as np
import pandas as pd
from IPython.display import display, HTML
from scipy.stats import norm
from data_algebra.cdata import RecordSpecification
from plotnine import (
    aes, 
    ggplot, 
    geom_line, geom_point, geom_ribbon, geom_vline,
    scale_color_manual, scale_fill_manual,
    xlab, ylab,
    ggtitle,
    theme
)
# our parameters of interest
n = 557
r = 0.1
t = 0.061576
power = 0.9
significance = 0.02
# convert to what were the function arguments
stdev = np.sqrt(0.5 / n)
effect_size = r
threshold = t
title='Area under the tails give you significance and (1-power)'
subtitle = 'Significance: control right tail; (1-Power): treatment left tail'
eps=1e-6
control_color='#d95f02'
treatment_color='#1b9e77'
# define the wide plotting data
x = set(np.arange(-5 * stdev, 5 * stdev + effect_size, step=stdev / 100))
x.update([threshold, threshold-eps, threshold+eps])
x = sorted(x)

pframe = pd.DataFrame({
    'x': x,
    'control': norm.pdf(x, loc=0, scale=stdev),
    'treatment': norm.pdf(x, loc=effect_size, scale=stdev),
})
# control's right tail
pframe['control_tail'] = np.where(pframe['x'] > threshold, pframe['control'], 0)
# treatment's left tail
pframe['treatment_tail'] = np.where(pframe['x'] <= threshold, pframe['treatment'], 0)

pframe
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
x control treatment control_tail treatment_tail
0 -0.149805 4.962170e-05 1.069071e-14 0.000000e+00 1.069071e-14
1 -0.149506 5.216325e-05 1.161969e-14 0.000000e+00 1.161969e-14
2 -0.149206 5.482949e-05 1.262815e-14 0.000000e+00 1.262815e-14
3 -0.148907 5.762625e-05 1.372275e-14 0.000000e+00 1.372275e-14
4 -0.148607 6.055961e-05 1.491073e-14 0.000000e+00 1.491073e-14
... ... ... ... ... ...
1332 0.248377 1.588927e-14 6.290411e-05 1.588927e-14 0.000000e+00
1333 0.248677 1.462444e-14 5.986178e-05 1.462444e-14 0.000000e+00
1334 0.248977 1.345895e-14 5.696089e-05 1.345895e-14 0.000000e+00
1335 0.249276 1.238510e-14 5.419515e-05 1.238510e-14 0.000000e+00
1336 0.249576 1.139579e-14 5.155855e-05 1.139579e-14 0.000000e+00

1337 rows × 5 columns

# convert from to long for for plotting using the data algebra

# specify the cdata record transform
record_transform = RecordSpecification(
    pd.DataFrame({
        'group': ['treatment', 'control'],
        'y': ['treatment', 'control'],
        'tail': ['treatment_tail', 'control_tail'],
    }),
    record_keys=['x'],
    control_table_keys=['group'],
).map_from_rows()


record_transform

RecordMap: transforming records of the form:

<style type="text/css"> #T_2e174_row0_col0 { background-color: #FFE4C4; } </style>
  record id value
  x treatment control treatment_tail control_tail
0 x record key treatment value control value treatment_tail value control_tail value

to records of the form:
<style type="text/css"> #T_9fd51_row0_col0, #T_9fd51_row1_col0 { background-color: #FFE4C4; } #T_9fd51_row0_col1, #T_9fd51_row1_col1 { background-color: #7FFFD4; } </style>
  record id record structure value
  x group y tail
0 x record key control control value control_tail value
1 x record key treatment treatment value treatment_tail value

For details on the record transform please see here.

# apply the record transform
pframelong = record_transform(pframe)

pframelong
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
x group y tail
0 -0.149805 control 4.962170e-05 0.000000e+00
1 -0.149805 treatment 1.069071e-14 1.069071e-14
2 -0.149506 control 5.216325e-05 0.000000e+00
3 -0.149506 treatment 1.161969e-14 1.161969e-14
4 -0.149206 control 5.482949e-05 0.000000e+00
... ... ... ... ...
2669 0.248977 treatment 5.696089e-05 0.000000e+00
2670 0.249276 control 1.238510e-14 1.238510e-14
2671 0.249276 treatment 5.419515e-05 0.000000e+00
2672 0.249576 control 1.139579e-14 1.139579e-14
2673 0.249576 treatment 5.155855e-05 0.000000e+00

2674 rows × 4 columns

# make the plot using the plotnine implementation 
# of Leland Wilkinson's Grammar of Graphics
# (nearly call equiv to Hadley Wickham ggplot2 realization)
palette = {'control': control_color, 'treatment': treatment_color}
p = (
    ggplot(pframelong, aes(x='x', y='y'))
        + geom_line(aes(color='group'))
        + geom_vline(xintercept=threshold, 
                     color='#7f7f7f', 
                     # linewidth=1,
                     )
        + geom_ribbon(aes(ymin=0, ymax='tail', fill='group'), alpha = 0.5)
        + scale_color_manual(values=palette)
        + scale_fill_manual(values=palette)
        + theme(legend_position='none')
        + ylab('density')
        + xlab('observed difference')
        + ggtitle(
            title 
            + "\n" + subtitle,
            )
    )

p

png

<Figure Size: (640 x 480)>