# Multiple Hypothesis Testing

In [8]:
from os.path import join

import pandas as pd
import numpy as np
from multipy.fwer import bonferroni
from multipy.fdr import lsu

from common.paths import PLS_RESULTS, RIDGE_RESULTS, CROSS_PRED_RESULTS

## Get data

### Manual Input

In [2]:
bin_all = [0.0003, 0.0003, 0.0003, 0.0003, 0.0100, 0.0643]
bin_1 = [0.0953, 0.0313, 0.0593, 0.6485, 0.6048, 0.8287]
bin_2 = [0.0003, 0.0247, 0.0003, 0.0020, 0.0107, 0.4159]
bin_3 = [0.1789, 0.0653, 0.3196, 0.4172, 0.2779, 0.3162]
pvals = np.array(bin_all + bin_1 + bin_2 + bin_3)
df = pd.DataFrame({'Features': ['Feature {}'.format(i) for i in range(1,len(pvals)+1  )], 'P-value':pvals})
display(df)

Unnamed: 0,Features,P-value
0,Feature 1,0.0003
1,Feature 2,0.0003
2,Feature 3,0.0003
3,Feature 4,0.0003
4,Feature 5,0.01
5,Feature 6,0.0643
6,Feature 7,0.0953
7,Feature 8,0.0313
8,Feature 9,0.0593
9,Feature 10,0.6485


### Within-Prediction

In [7]:
fn = 'pls_pts_adhd.csv'
results_path = join(PLS_RESULTS, fn)
df = pd.read_csv(results_path, index_col=0)
# df['Features'] = df['Target'] + df['Bin']
pvals = df['P-value']
display(df)

Unnamed: 0,Model,Population,Target,Bin,Score,P-value
0,pls,adhd,WISC_FSIQ,All,0.373846,0.000333
1,pls,adhd,WISC_FSIQ,Bin 1,0.255441,0.014662
2,pls,adhd,WISC_FSIQ,Bin 2,0.410544,0.000333
3,pls,adhd,WISC_FSIQ,Bin 3,0.076156,0.279573
4,pls,adhd,WISC_VSI,All,0.331722,0.000333
5,pls,adhd,WISC_VSI,Bin 1,0.235881,0.018994
6,pls,adhd,WISC_VSI,Bin 2,0.23864,0.007664
7,pls,adhd,WISC_VSI,Bin 3,0.143082,0.119627
8,pls,adhd,WISC_VCI,All,0.357835,0.000333
9,pls,adhd,WISC_VCI,Bin 1,0.200833,0.04132


### Cross-Prediction

In [16]:
fn = 'ridge_pts_age_cross_prediction.csv'
# fn = 'ridge_pts_diagnosis_cross_prediction.csv'
results_path = join(CROSS_PRED_RESULTS, fn)
df = pd.read_csv(results_path, index_col=0)
df = df.reset_index(drop=True)
pvals = df['P-value']
display(df)

Unnamed: 0,Model,Target,Train,Test,Score,P-value,Population,Num Permutations,BFR,FDR
0,ridge,WISC_FSIQ,Bin 1,Bin 1,0.240653,0.023952,adhd,500,False,True
1,ridge,WISC_FSIQ,Bin 1,Bin 2,0.329501,0.001996,adhd,500,False,True
2,ridge,WISC_FSIQ,Bin 1,Bin 3,0.212915,0.007984,adhd,500,False,True
3,ridge,WISC_FSIQ,Bin 2,Bin 2,0.392312,0.001996,adhd,500,False,True
4,ridge,WISC_FSIQ,Bin 2,Bin 3,0.222423,0.005988,adhd,500,False,True
5,ridge,WISC_FSIQ,Bin 2,Bin 1,0.329306,0.001996,adhd,500,False,True
6,ridge,WISC_FSIQ,Bin 3,Bin 3,0.061523,0.307385,adhd,500,False,False
7,ridge,WISC_FSIQ,Bin 3,Bin 1,0.26997,0.001996,adhd,500,False,True
8,ridge,WISC_FSIQ,Bin 3,Bin 2,0.281861,0.001996,adhd,500,False,True
9,ridge,WISC_VSI,Bin 1,Bin 1,0.227449,0.023952,adhd,500,False,True


## Run correction

In [17]:
a_threshold = 0.05
df['BFR'] = bonferroni(pvals, alpha=a_threshold)
df['FDR'] = lsu(pvals, q=a_threshold)
display(df.round(4))

Unnamed: 0,Model,Target,Train,Test,Score,P-value,Population,Num Permutations,BFR,FDR
0,ridge,WISC_FSIQ,Bin 1,Bin 1,0.2407,0.024,adhd,500,False,True
1,ridge,WISC_FSIQ,Bin 1,Bin 2,0.3295,0.002,adhd,500,False,True
2,ridge,WISC_FSIQ,Bin 1,Bin 3,0.2129,0.008,adhd,500,False,True
3,ridge,WISC_FSIQ,Bin 2,Bin 2,0.3923,0.002,adhd,500,False,True
4,ridge,WISC_FSIQ,Bin 2,Bin 3,0.2224,0.006,adhd,500,False,True
5,ridge,WISC_FSIQ,Bin 2,Bin 1,0.3293,0.002,adhd,500,False,True
6,ridge,WISC_FSIQ,Bin 3,Bin 3,0.0615,0.3074,adhd,500,False,False
7,ridge,WISC_FSIQ,Bin 3,Bin 1,0.27,0.002,adhd,500,False,True
8,ridge,WISC_FSIQ,Bin 3,Bin 2,0.2819,0.002,adhd,500,False,True
9,ridge,WISC_VSI,Bin 1,Bin 1,0.2274,0.024,adhd,500,False,True


In [18]:
m = len(pvals)
sort_ind = np.argsort(pvals)
qs = [(i+1.)*0.05/m for i, p in enumerate(pvals[sort_ind])]
a = pd.DataFrame([(p, q) for p, q in zip(pvals[sort_ind], qs)])
display(a)

Unnamed: 0,0,1
0,0.001996,0.000926
1,0.001996,0.001852
2,0.001996,0.002778
3,0.001996,0.003704
4,0.001996,0.00463
5,0.001996,0.005556
6,0.001996,0.006481
7,0.001996,0.007407
8,0.001996,0.008333
9,0.001996,0.009259


## Save and update results file

In [6]:
df.to_csv(results_path)