# Effect Size & Power

### Imports

In [None]:
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
from statsmodels.stats.power import TTestIndPower
    # statsmodels also has FTestPower and FTestAnovaPower classes!
import seaborn as sns

%matplotlib inline

[Here](https://www.physport.org/recommendations/Entry.cfm?ID=93385) is a helpful resource.

Examples of power tables:
- [full power table](http://www.pilesofvariance.com/Chapter13/Cohen_Power_Tables.pdf)

### Cohen's $d$, standardized metrics for effect size
Cohen’s $d$ is one of the most common ways to measure effect size. As an effect size, Cohen's $d$ is typically used to represent the magnitude of differences between two (or more) groups on a given variable, with larger values representing a greater differentiation between the two groups on that variable.

$d$ = difference of means / pooled standard deviation;

$d = \frac{\mu_1 - \mu_2}{\sigma_{pooled}}$

* Larger the effect size, stronger the power of the test

In [None]:
def evaluate_PDF(rv, x=4):
    """
    Input: a random variable object, number of standard deviations
    Output: x and y values for the normal distribution
    """
    
    # Identify the mean and standard deviation of random variable.
    mean = rv.mean()
    std = rv.std()

    # Use numpy to calculate evenly spaced numbers over the
    # specified interval (4 sd by default) and generate 100 samples.
    xs = np.linspace(mean - x*std, mean + x*std, 100)
    
    # Calculate the normal distribution i.e. the probability density. 
    ys = rv.pdf(xs)

    return xs, ys # Return calculated values

In [None]:
def Cohen_d(group1, group2):

    """
    Computes Cohen's d.
    """
    
    # group1: Series or NumPy array
    # group2: Series or NumPy array

    # returns a floating point number 

    diff = group1.mean() - group2.mean()

    n1 = len(group1)
    n2 = len(group2)
    var1 = group1.var(ddof=1)
    var2 = group2.var(ddof=1)

    # Calculate the pooled variance
    pooled_var = ((n1-1) * var1 + (n2-1) * var2) / (n1 + n2 - 2)
    
    # Calculate Cohen's d statistic
    d = diff / np.sqrt(pooled_var)
    
    return d

In [None]:
np.random.seed(10)
# x1 = np.random.normal(male_mean, male_sd, 1000)
# x2 = np.random.normal(female_mean, female_sd, 1000)

female_sample = female_height.rvs(1000)
male_sample = male_height.rvs(1000)

effect = Cohen_d(male_sample, female_sample)
print(effect)

#### Evaluating Effect Size

[good demo here](https://rpsychologist.com/d3/cohend/)

Small effect = 0.2

Medium Effect = 0.5

Large Effect = 0.8

### Plotting Power Curves

In [None]:
test.plot_power(dep_var='nobs',
                         nobs=np.arange(10, 100),
                         effect_size=[0.2, 0.5, 0.8, 1.3]);