In [1]:
import numpy as np
import pandas as pd
import numba

In [2]:
@numba.jit(nopython=True)
def draw_bs_sample(data):
    """
    Draw a bootstrap sample from a 1D data set.
    """
    return np.random.choice(data, size=len(data))

@numba.jit(nopython=True)
def draw_bs_reps_diff_mean(x, y, size=10000):
    """
    Generate bootstrap replicates with difference of means
    as the test statistic.
    """
    out = np.empty(size)
    for i in range(size):
        out[i] = np.mean(draw_bs_sample(x)) - np.mean(draw_bs_sample(y))
    return out

In [3]:
file_name = '/Volumes/James McGehee Mac/Documents/Stathopoulos_Lab/Leslie_sna_paper/viability_counts.xlsx'

df_25 = pd.read_excel(file_name, 0)

In [4]:
genotypes_25 = df_25['genotype'].unique()

total_yw_25 = sum(df_25.loc[df_25['genotype'] == genotypes_25[0], 'total'])
hatched_yw_25 = sum(df_25.loc[df_25['genotype'] == genotypes_25[0], 'hatched'])
    
data_yw_25 = np.zeros([total_yw_25])
data_yw_25[0:hatched_yw_25] = 1

p_val_25 = np.zeros([len(genotypes_25)])

for i in range(len(genotypes_25)):
    total_25 = sum(df_25.loc[df_25['genotype'] == genotypes_25[i], 'total'])
    hatched_25 = sum(df_25.loc[df_25['genotype'] == genotypes_25[i], 'hatched'])
    
    data_25 = np.zeros([total_25])
    data_25[0:hatched_25] = 1
    
    # Compute test statistic for original data set
    diff_mean_25 = np.mean(data_yw_25) - np.mean(data_25)

    # Shift data sets
    total_mean_25 = np.mean(np.concatenate((data_yw_25, data_25)))
    data_yw_25_shift = data_yw_25 - np.mean(data_yw_25) + total_mean_25
    data_25_shift = data_25 - np.mean(data_25) + total_mean_25

    # Generate samples (10 million again)
    bs_reps_25 = draw_bs_reps_diff_mean(data_yw_25_shift, data_25_shift, size=10000000)

    # Compute p-value
    p_val_25[i] = np.sum(np.abs(bs_reps_25) >= np.abs(diff_mean_25)) / len(bs_reps_25)
    
    print('For {0} the p-value = {1:.5e}'.format(genotypes_25[i], p_val_25[i]))

For YW the p-value = 1.00000e+00
For delprox1.3 the p-value = 3.00000e-07
For del3.0 the p-value = 0.00000e+00
For del4.4 the p-value = 0.00000e+00
For del2.6 dl l_h the p-value = 3.65100e-04
For deldist0.4 the p-value = 3.00000e-07
For del1.8 the p-value = 0.00000e+00
For del dist 2kb the p-value = 0.00000e+00
For del2.6 the p-value = 6.04860e-03
For delprox1.3_del dist 2kb the p-value = 0.00000e+00
