# Multiple Hypothesis Testing

In [2]:
from os.path import join

import pandas as pd
import numpy as np
from multipy.fwer import bonferroni
from multipy.fdr import lsu

from common.paths import PLS_RESULTS, RIDGE_RESULTS, CROSS_PRED_RESULTS

## Get data

### Manual Input

In [2]:
bin_all = [0.0003, 0.0003, 0.0003, 0.0003, 0.0100, 0.0643]
bin_1 = [0.0953, 0.0313, 0.0593, 0.6485, 0.6048, 0.8287]
bin_2 = [0.0003, 0.0247, 0.0003, 0.0020, 0.0107, 0.4159]
bin_3 = [0.1789, 0.0653, 0.3196, 0.4172, 0.2779, 0.3162]
pvals = np.array(bin_all + bin_1 + bin_2 + bin_3)
df = pd.DataFrame({'Features': ['Feature {}'.format(i) for i in range(1,len(pvals)+1  )], 'P-value':pvals})
display(df)

Unnamed: 0,Features,P-value
0,Feature 1,0.0003
1,Feature 2,0.0003
2,Feature 3,0.0003
3,Feature 4,0.0003
4,Feature 5,0.01
5,Feature 6,0.0643
6,Feature 7,0.0953
7,Feature 8,0.0313
8,Feature 9,0.0593
9,Feature 10,0.6485


### Within-Prediction

In [7]:
fn = 'pls_pts_adhd.csv'
results_path = join(PLS_RESULTS, fn)
df = pd.read_csv(results_path, index_col=0)
# df['Features'] = df['Target'] + df['Bin']
pvals = df['P-value']
display(df)

Unnamed: 0,Model,Population,Target,Bin,Score,P-value
0,pls,adhd,WISC_FSIQ,All,0.373846,0.000333
1,pls,adhd,WISC_FSIQ,Bin 1,0.255441,0.014662
2,pls,adhd,WISC_FSIQ,Bin 2,0.410544,0.000333
3,pls,adhd,WISC_FSIQ,Bin 3,0.076156,0.279573
4,pls,adhd,WISC_VSI,All,0.331722,0.000333
5,pls,adhd,WISC_VSI,Bin 1,0.235881,0.018994
6,pls,adhd,WISC_VSI,Bin 2,0.23864,0.007664
7,pls,adhd,WISC_VSI,Bin 3,0.143082,0.119627
8,pls,adhd,WISC_VCI,All,0.357835,0.000333
9,pls,adhd,WISC_VCI,Bin 1,0.200833,0.04132


### Cross-Prediction

In [6]:
fn = 'ridge_pts_age_cross_prediction.csv'
results_path = join(CROSS_PRED_RESULTS, fn)
df = pd.read_csv(results_path, index_col=0)
pvals = df['P-value']
display(df)

Unnamed: 0,Model,Target,Num Permutations,Train Group,Test Group,Score,P-value
0,ridge,WISC_FSIQ,500,Bin 1,Bin 1,0.2453,0.022
1,ridge,WISC_FSIQ,500,Bin 1,Bin 2,0.3361,0.002
2,ridge,WISC_FSIQ,500,Bin 1,Bin 3,0.1995,0.008
3,ridge,WISC_FSIQ,500,Bin 2,Bin 2,0.3774,0.002
4,ridge,WISC_FSIQ,500,Bin 2,Bin 3,0.2402,0.004
5,ridge,WISC_FSIQ,500,Bin 2,Bin 1,0.3567,0.002
6,ridge,WISC_FSIQ,500,Bin 3,Bin 3,0.0589,0.3214
7,ridge,WISC_FSIQ,500,Bin 3,Bin 1,0.2149,0.006
8,ridge,WISC_FSIQ,500,Bin 3,Bin 2,0.3137,0.002
9,ridge,WISC_VSI,500,Bin 1,Bin 1,0.2309,0.024


## Run correction

In [7]:
a_threshold = 0.05
df['BFR'] = bonferroni(pvals, alpha=a_threshold)
df['FDR'] = lsu(pvals, q=a_threshold)
display(df.round(4))

Unnamed: 0,Model,Target,Num Permutations,Train Group,Test Group,Score,P-value,BFR,FDR
0,ridge,WISC_FSIQ,500,Bin 1,Bin 1,0.2453,0.022,False,True
1,ridge,WISC_FSIQ,500,Bin 1,Bin 2,0.3361,0.002,False,True
2,ridge,WISC_FSIQ,500,Bin 1,Bin 3,0.1995,0.008,False,True
3,ridge,WISC_FSIQ,500,Bin 2,Bin 2,0.3774,0.002,False,True
4,ridge,WISC_FSIQ,500,Bin 2,Bin 3,0.2402,0.004,False,True
5,ridge,WISC_FSIQ,500,Bin 2,Bin 1,0.3567,0.002,False,True
6,ridge,WISC_FSIQ,500,Bin 3,Bin 3,0.0589,0.3214,False,False
7,ridge,WISC_FSIQ,500,Bin 3,Bin 1,0.2149,0.006,False,True
8,ridge,WISC_FSIQ,500,Bin 3,Bin 2,0.3137,0.002,False,True
9,ridge,WISC_VSI,500,Bin 1,Bin 1,0.2309,0.024,False,True


## Save and update results file

In [8]:
df.to_csv(results_path)