This notebook will examine how well fit effect sizes compare to the emperical effect sizes for parametric data.

1. Calculate the extrapolated power from the stimulation.
2. Compare the effect size from the traditional power for the three methods of extrapolation.
    * Look at the regression of all effects
    * Look for values at tails (i.e. is there a maximum value for power which should be excluded?)
    * Check the ratio between fitting methods
3. Look at the effect size from emperical models for the three methods. Look for noise, etc.

In [1]:
import copy
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import scipy
import statsmodels.api as sms
import statsmodels.formula.api as smf

import emp_power.traditional as trad
import emp_power.effects as eff
import emp_power.plot as plot

% matplotlib inline
sn.set_style('ticks')

In [2]:
num_rounds = 100
alpha = 0.05
tests = ['ttest_ind']
counts = np.arange(5, 100, 10)
overwrite = True

In [3]:
sim_location = './simulations/'
if not os.path.exists(sim_location):
    raise ValueError('The simulations do not exist. Go back and simulate some data!')

In [4]:
tests = ['ttest_ind', 'ttest_1', 'anova_3', 'anova_8', 'correlation']
names = ['One sample T Test', 'Two Sample T Test', 'ANOVA (3 groups)', 'ANOVA (8 groups)', 'correlation']

In [5]:
def calc_power_summary(counts, power, alpha=0.05):
    """Calculates"""
    # Calculates the effect size vectors
    eff_z = eff.z_effect(counts, power, alpha=alpha)
    eff_t = eff.t_effect(counts, power, alpha=alpha)
    eff_f = eff.f_effect(counts, power, alpha=alpha)
    
    return (eff_z, eff_t, eff_f)

Let's start by looking at effect sizes extrapolated from test-based methods. Since traditional power is a one-dimensional array and the effect size calculation returns a two-dimensional array, we will take the first observation in the array.

In [None]:
trad_summary = {}
for test_name in tests:
    trad_summary[test_name] = {}
    
    # Checks the directory location
    power_dir = os.path.join(sim_location, 'emperical_power/%s' % test_name)
    trad_dir = os.path.join(sim_location, 'extrapolated_effect/test-based/%s' % test_name)
    
    # Creates the save directory
    if not os.path.exists(trad_dir):
        os.makedirs(trad_dir)
        
    # Looks at the simulations
    for i in range(num_rounds):
        power_fp = os.path.join(power_dir, 'simulation_%i.p' % i)
        trad_fp = os.path.join(trad_dir, 'simulation_%i.p' % i)
        
        # Loads the power summary and adds it to the summary object
        # Then skips to the next loop
        if os.path.exists(trad_fp) and not overwrite:
            with open(trad_fp, 'rb') as f_:
                trad_summary[test_name][i] = pickle.load(f_)
            continue

        # Loads the power data
        with open(power_fp, 'rb') as f_:
            power_sim = pickle.load(f_)

        # Draws the observations from the power simuation
        power = power_sim['traditional_power']
        counts = power_sim['counts']
        
        eff_z, eff_t, eff_f = calc_power_summary(counts, power)
        
        round_summary = {
            'counts': counts,
            'index': np.arange(len(counts)),
            'power': power,
            'dummy': i * np.ones(len(counts)).astype(int),
            'f_effect': eff_f[0],
            't_effect': eff_t[0],
            'z_effect': eff_z[0],
            }
        trad_summary[test_name][i] = round_summary
        
        with open(trad_fp, 'wb') as f_:
            pickle.dump(round_summary, f_)

Let's generate a summary dataframe, which will be easier to handle.

In [None]:
def draw_traditional_values(test_name):
    """Generates a distance matrix summary for the analyses"""
    test_data = pd.DataFrame(copy.copy(trad_summary[test_name])).transpose()
    test_sum = pd.DataFrame(
            data=[np.hstack(test_data['f_effect']),
                  np.hstack(test_data['t_effect']),
                  np.hstack(test_data['z_effect']),
                  np.hstack(test_data['power']),
                  np.hstack(test_data['dummy']),
                  np.hstack(test_data['counts']),
                  np.hstack(test_data['index']),
                 ],
            index=['f_effect', 't_effect', 'z_effect', 'power', 
                   'simulation', 'count', 'sim_position'],
            ).transpose()
    test_sum['test'] = test_name
    test_sum['index'] = (test_sum['test'] + 
                         test_sum['simulation'].apply(lambda x: ".%i" % x) + 
                         test_sum['count'].apply(lambda x: ".%02i".zfill(2) % x)
                         )
    test_sum.set_index('index', inplace=True)
    
    nans = test_sum.index[pd.isnull(test_sum).any(1)]
    
    test_sum.drop(nans, inplace=True)
    
    return test_sum, nans

We'll combine all five tests to 

In [None]:
fig, axes = plt.subplots(6, 6)
fig.set_size_inches(12, 10)
f_all = []
t_all = []
z_all = []
g_all = []
pwr_all = []

for (test_name, ax_r) in zip(*(tests, axes[1:])):
#     print(test_name)
    [ax1, ax2, ax3, ax4, ax5, ax6] = ax_r
    data, nans = draw_traditional_values(test_name)
    data.dropna(inplace=True)
    f_ = data['f_effect']
    t_ = data['t_effect']
    z_ = data['z_effect']
    g_ = data['count']
    pwr = data['power']
    
    f_all.append(f_)
    t_all.append(t_)
    z_all.append(z_)
    g_all.append(g_)
    pwr_all.append(pwr)

    plot.gradient_comparison(f_, t_, g_, pwr, [ax1, ax2], alpha=0.25, cmap='Spectral')
    plot.gradient_comparison(z_, t_, g_, pwr, [ax3, ax4], alpha=0.25, cmap='Spectral')
    plot.gradient_comparison(f_, z_, g_, pwr, [ax5, ax6], alpha=0.25, cmap='Spectral')
    ax1.set_xlabel('')
    ax3.set_xlabel('')
    ax5.set_xlabel('')
    ax1.set_ylabel(test_name)
    ax3.set_ylabel('')
    ax5.set_ylabel('')

sn.despine()

[ax1, ax2, ax3, ax4, ax5, ax6] = axes[0]
f_all = np.hstack(f_all)
t_all = np.hstack(t_all)
z_all = np.hstack(z_all)
g_all = np.hstack(g_all)
pwr_all = np.hstack(pwr_all)
plot.gradient_comparison(f_all, t_all, g_all, pwr_all, [ax1, ax2], alpha=0.1, cmap='Spectral')
plot.gradient_comparison(z_all, t_all, g_all, pwr_all, [ax3, ax4], alpha=0.1, cmap='Spectral')
plot.gradient_comparison(f_all, z_all, g_all, pwr_all, [ax5, ax6], alpha=0.1, cmap='Spectral')
ax1.set_ylabel('all')
l1 = ax1.set_xlabel('f vs t')
l2 = ax3.set_xlabel('z vs t')
l3 = ax5.set_xlabel('f vs z')

It looks like observations from the first run and values with power greater than about 0.95 do not behave well. Let's also assume that power less than 0.1 does not behave well either. So, we'll filter out the data from the lowest value and the power value s outside the limits.

In [None]:
fig, axes = plt.subplots(6, 6)
fig.set_size_inches(12, 10)
f_all = []
t_all = []
z_all = []
g_all = []
pwr_all = []

for (test_name, ax_r) in zip(*(tests, axes[1:])):
#     print(test_name)
    [ax1, ax2, ax3, ax4, ax5, ax6] = ax_r
    data, nans = draw_traditional_values(test_name)
    data.dropna(inplace=True)
    data = data.loc[(data['power'] < 0.95) & (data['power'] > 0.1) & (data['count'] > 5)]
    f_ = data['f_effect']
    t_ = data['t_effect']
    z_ = data['z_effect']
    g_ = data['count']
    pwr = data['power']
    
    f_all.append(f_)
    t_all.append(t_)
    z_all.append(z_)
    g_all.append(g_)
    pwr_all.append(pwr)

    plot.gradient_comparison(f_, t_, g_, pwr, [ax1, ax2], alpha=0.25, cmap='Spectral', resid_y=[-0.1, 0.1])
    plot.gradient_comparison(z_, t_, g_, pwr, [ax3, ax4], alpha=0.25, cmap='Spectral', resid_y=[-0.1, 0.1])
    plot.gradient_comparison(f_, z_, g_, pwr, [ax5, ax6], alpha=0.25, cmap='Spectral', resid_y=[-0.1, 0.1])
    ax1.set_xlabel('')
    ax3.set_xlabel('')
    ax5.set_xlabel('')
    ax1.set_ylabel(test_name)
    ax3.set_ylabel('')
    ax5.set_ylabel('')

sn.despine()

[ax1, ax2, ax3, ax4, ax5, ax6] = axes[0]
f_all = np.hstack(f_all)
t_all = np.hstack(t_all)
z_all = np.hstack(z_all)
g_all = np.hstack(g_all)
pwr_all = np.hstack(pwr_all)
plot.gradient_comparison(f_all, t_all, g_all, pwr_all, [ax1, ax2], alpha=0.1, cmap='Spectral', resid_y=[-0.1, 0.1])
plot.gradient_comparison(z_all, t_all, g_all, pwr_all, [ax3, ax4], alpha=0.1, cmap='Spectral', resid_y=[-0.1, 0.1])
plot.gradient_comparison(f_all, z_all, g_all, pwr_all, [ax5, ax6], alpha=0.1, cmap='Spectral', resid_y=[-0.1, 0.1])
ax1.set_ylabel('all')
l1 = ax1.set_title('f vs t')
l2 = ax3.set_title('z vs t')
l3 = ax5.set_title('f vs z')

ax2.get_ylim()

If we exclude those values, it looks like we have, for the most part, a strong correlation between ($R^{2} \geq 0.995$). However, we also find that there's a constant relationship between the T-test effect size and both the F and Z distribution. 

The Statsmodels effect is likely calculated as

$\begin{align}
\lambda &= \frac{\bar{x}_{1} - \bar{x}_{2}}{s\left (\sqrt{1 / n} + \sqrt{1 / n}\right )}\\
&= \frac{\bar{x}_{1} - \bar{x}_{2}}{s\sqrt{2 \ n}}\\
&= \sqrt{\frac{n}{2}}\left (\frac{\bar{x}_{1} - \bar{x}_{2}}{s} \right )
\end{align}$

while the z test and f test implementations do not include this term. So, we can try to correct the effect sizes using this adjustment.

In [None]:
fig, axes = plt.subplots(6, 6)
fig.set_size_inches(12, 10)
f_all = []
t_all = []
z_all = []
g_all = []
pwr_all = []

for (test_name, ax_r) in zip(*(tests, axes[1:])):
#     print(test_name)
    [ax1, ax2, ax3, ax4, ax5, ax6] = ax_r
    data, nans = draw_traditional_values(test_name)
    data.dropna(inplace=True)
    data = data.loc[(data['power'] < 0.95) & (data['power'] > 0.1) & (data['count'] > 5)]
    f_ = data['f_effect']
    t_ = data['t_effect'] / np.sqrt(2)
    z_ = data['z_effect']
    g_ = data['count']
    pwr = data['power']
    
    f_all.append(f_)
    t_all.append(t_)
    z_all.append(z_)
    g_all.append(g_)
    pwr_all.append(pwr)

    plot.gradient_comparison(f_, t_, g_, pwr, [ax1, ax2], alpha=0.25, cmap='Spectral', resid_y=[-0.1, 0.1])
    plot.gradient_comparison(z_, t_, g_, pwr, [ax3, ax4], alpha=0.25, cmap='Spectral', resid_y=[-0.1, 0.1])
    plot.gradient_comparison(f_, z_, g_, pwr, [ax5, ax6], alpha=0.25, cmap='Spectral', resid_y=[-0.1, 0.1])
    ax1.set_xlabel('')
    ax3.set_xlabel('')
    ax5.set_xlabel('')
    ax1.set_ylabel(test_name)
    ax3.set_ylabel('')
    ax5.set_ylabel('')

sn.despine()

[ax1, ax2, ax3, ax4, ax5, ax6] = axes[0]
f_all = np.hstack(f_all)
t_all = np.hstack(t_all)
z_all = np.hstack(z_all)
g_all = np.hstack(g_all)
pwr_all = np.hstack(pwr_all)
plot.gradient_comparison(f_all, t_all, g_all, pwr_all, [ax1, ax2], alpha=0.1, cmap='Spectral', resid_y=[-0.1, 0.1])
plot.gradient_comparison(z_all, t_all, g_all, pwr_all, [ax3, ax4], alpha=0.1, cmap='Spectral', resid_y=[-0.1, 0.1])
plot.gradient_comparison(f_all, z_all, g_all, pwr_all, [ax5, ax6], alpha=0.1, cmap='Spectral', resid_y=[-0.1, 0.1])
ax1.set_ylabel('all')
l1 = ax1.set_title('f vs t')
l2 = ax3.set_title('z vs t')
l3 = ax5.set_title('f vs z')

ax2.get_ylim()

So, we see a slight difference in the shape of the curves, however, we see a high degree of correlations between the results.

In the next notebook, we'll look at the way these effect sizes behave with the emperically calculated effects and emperically calculated power.