### Exploring Power And Sample Sizes

A brief foray into the realm of statistical robustness:

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st

import warnings
warnings.filterwarnings("ignore")

In [2]:
def one_prop_sample_size(p0, p1, alpha, power):
    """
    Input:
    - p0: Historical/known proportion to compare to
    - p1: Desired proportion to test against
    - alpha: Desired p-value
    - power: Likelihood of avoiding type II error
    
    Output:
    - return: Sample size
    
    Notes:
    - Created using "Sample Size for One Sample, Dichotomous Outcome" taken from tinyurl.com/jyym9d9f
    """
    z_score_a = st.norm.ppf((1 - alpha)/2)
    z_score_b = st.norm.ppf(power)
    ES = (p1 - p0)/np.sqrt(p1*(1 - p1))
    return ((z_score_a + z_score_b)/ES)**2

In [3]:
one_prop_sample_size(0.6, 0.5, 0.05, 0.9)

37.13956540657675

### Using Power To Calculate Sample Sizes For BILS Data

In [4]:
bls = pd.read_csv('blsdata.csv')

In [5]:
full_bls = bls.dropna(how='all').iloc[:-1]

In [6]:
full_bls.isna().sum()

Occupations                    0
Count                          0
Women                          0
White                          0
Black or\nAfrican\nAmerican    0
Asian                          0
Hispanic\nor Latino            0
dtype: int64

In [7]:
full_bls['Count'] = full_bls['Count'].str.replace(',','').astype(int)
clean_bls = full_bls[full_bls['Count'] > 48]

In [8]:
clean_bls['Women'] = clean_bls['Women'].astype(float)
clean_bls['Black or\nAfrican\nAmerican'] = clean_bls['Black or\nAfrican\nAmerican'].astype(float)
clean_bls['Asian'] = clean_bls['Asian'].astype(float)
clean_bls['Hispanic\nor Latino'] = clean_bls['Hispanic\nor Latino'].astype(float)

In [9]:
lst = []
for prop in clean_bls['Women']:
    lst += [one_prop_sample_size(prop/100, 0.5, 0.05, 0.9)]

In [10]:
bias_w = clean_bls.reset_index()[['Occupations', 'Women']]

w_samples = bias_w.assign(Sample=pd.Series(lst))

In [11]:
w_samples.head()

Unnamed: 0,Occupations,Women,Sample
0,"Management, professional, and related occupat...",51.5,1650.647351
1,"Management, business, and financial operations...",44.8,137.350464
2,Management occupations,40.5,41.151873
3,Chief executives,29.2,8.584404
4,General and operations managers,35.4,17.423328
