# Hypothesis tests from confidence intervals

In [52]:
import pandas as pd
from scipy.stats import norm

from common import *

## Import confidence interval tables

In [5]:
met_ci_table = pd.read_csv(tables_path / 'met_flux_ci.csv')
hcys_ci_table = pd.read_csv(tables_path / 'hcys_flux_ci.csv')

In [11]:
# merge into a single dataframe
ci_table = pd.concat(
    [
        met_ci_table.assign(condition='met').set_index(['condition', 'cell_type', 'reaction']).sort_index(),
        hcys_ci_table.assign(condition='hcys').set_index(['condition', 'cell_type', 'reaction']).sort_index(),
    ]
)

In [14]:
ci_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,net,std err,ci_half-width,ci_lower,ci_upper
condition,cell_type,reaction,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
met,BJ-RAS,CYSTS,0.672216,0.111653,0.183652,0.488564,0.855869
met,BJ-RAS,HCYS_OUT,132.374727,5.488244,9.027358,123.347369,141.402085
met,BJ-RAS,METS,203.571238,61.183321,100.637607,102.933631,304.208846
met,BJ-RAS,MET_IN,641.592467,124.277554,204.418385,437.174082,846.010852
met,BJ-RAS,PROT_OUT,508.545524,124.15631,204.218957,304.326567,712.764481
met,BJ-RAS,SAM_METH,336.618181,60.932934,100.225758,236.392423,436.843939
met,BJ-TERT,CYSTS,3.918462,0.734904,1.20881,2.709652,5.127272
met,BJ-TERT,HCYS_OUT,149.877087,31.719494,52.173925,97.703161,202.051012
met,BJ-TERT,METS,26.037927,37.608795,61.860963,-35.823036,87.898891
met,BJ-TERT,MET_IN,569.383462,93.623437,153.99685,415.386611,723.380312


## Hypothesis tests

Assume net flux to be a normal distributed variable $X$ with true mean $\mu$. We have an observation $\bar{x}$ of the mean $\bar{X}$ with standard standard error $s = \sigma / n$ (assumed known). The variable $Z = (\bar{X} - \mu) / (\sigma/n)$ is then $N(0, 1)$. For testing the difference between two estimates, the statistic $(\bar{X}_1 - \bar{X}_2)/\sqrt(s_1^2 + s_2^2)$ is also $N(0, 1)$.

In [72]:
# compute a two-sided test p-value for a test table with conditions in rows
def normal_diff_test(test_table):
    means = []
    std_errs = []
    for index, row in test_table.iterrows():
        means.append(row['net'])
        std_errs.append(row['std err'])
    # two-sample z-statistic 
    z = (means[1] - means[0]) / np.sqrt(std_errs[0]**2 + std_errs[1]**2)
    # two-sided tail p-value
    return (1 - norm.cdf(abs(z))) * 2

Slice out values of interest from the CI table and perform tests

In [73]:
normal_diff_test(ci_table.xs(('BJ-TERT', 'METS'), level=('cell_type', 'reaction')))

0.0014978340293727577

In [74]:
normal_diff_test(ci_table.xs(('BJ-RAS', 'METS'), level=('cell_type', 'reaction')))

0.10453265110500909

In [75]:
normal_diff_test(ci_table.xs(('BJ-TERT', 'SAM_METH'), level=('cell_type', 'reaction')))

3.9968028886505635e-15

In [76]:
normal_diff_test(ci_table.xs(('BJ-RAS', 'SAM_METH'), level=('cell_type', 'reaction')))

9.924033963493173e-07