# Multiple Hypothesis Testing

In [1]:
from os.path import join

import pandas as pd
import numpy as np
from multipy.fwer import bonferroni
from multipy.fdr import lsu

from common.paths import PLS_RESULTS, RIDGE_RESULTS, CROSS_PRED_RESULTS

## Get data

### Manual Input

In [2]:
# bin_all = [0.0003, 0.0003, 0.0003, 0.0003, 0.0100, 0.0643]
# bin_1 = [0.0953, 0.0313, 0.0593, 0.6485, 0.6048, 0.8287]
# bin_2 = [0.0003, 0.0247, 0.0003, 0.0020, 0.0107, 0.4159]
# bin_3 = [0.1789, 0.0653, 0.3196, 0.4172, 0.2779, 0.3162]
# pvals = np.array(bin_all + bin_1 + bin_2 + bin_3)
pvals = np.array([0.3214, 0.0140, 0.0200, 0.5309, 0.1497, 0.4950])
df = pd.DataFrame({'Features': ['Feature {}'.format(i) for i in range(1, len(pvals) + 1)], 'P-value':pvals})
display(df)

Unnamed: 0,Features,P-value
0,Feature 1,0.3214
1,Feature 2,0.014
2,Feature 3,0.02
3,Feature 4,0.5309
4,Feature 5,0.1497
5,Feature 6,0.495


### Within-Prediction

In [5]:
fn = 'ridge_pts_adhd_two_bins.csv'
results_path = join(RIDGE_RESULTS, fn)
df = pd.read_csv(results_path, index_col=0)
# df['Features'] = df['Target'] + df['Bin']
pvals = df['P-value']
display(df)

Unnamed: 0,Model,Population,Target,Bin,Alpha,Score,P-value
0,ridge,adhd,WISC_FSIQ,Bin 1,6401,0.32019,0.001996
1,ridge,adhd,WISC_FSIQ,Bin 2,9901,0.191331,0.025948
2,ridge,adhd,WISC_VSI,Bin 1,9901,0.31333,0.001996
3,ridge,adhd,WISC_VSI,Bin 2,9901,0.182058,0.031936
4,ridge,adhd,WISC_VCI,Bin 1,701,0.292524,0.001996
5,ridge,adhd,WISC_VCI,Bin 2,9901,0.219886,0.011976
6,ridge,adhd,WISC_FRI,Bin 1,9901,0.209537,0.00998
7,ridge,adhd,WISC_FRI,Bin 2,7301,0.186734,0.021956
8,ridge,adhd,WISC_WMI,Bin 1,1,0.100642,0.143713
9,ridge,adhd,WISC_WMI,Bin 2,1,0.150171,0.073852


### Cross-Prediction

In [8]:
# fn = 'ridge_pts_age_cross_prediction.csv'
fn = 'ridge_pts_diagnosis_cross_prediction (n=93).csv'
results_path = join(CROSS_PRED_RESULTS, fn)
df = pd.read_csv(results_path, index_col=0)
df = df.reset_index(drop=True)
pvals = df['P-value']
display(df)

Unnamed: 0,Model,Target,Train,Test,Score,P-value,Population,Num Permutations
0,ridge,WISC_FSIQ,Healthy,Healthy,0.092055,0.225549,Healthy,500
1,ridge,WISC_FSIQ,Healthy,ADHD_One,0.18388,0.023952,Healthy,500
2,ridge,WISC_FSIQ,Healthy,ADHD_Two,0.356004,0.001996,Healthy,500
3,ridge,WISC_FSIQ,ADHD_One,ADHD_One,0.170265,0.123752,ADHD_One,500
4,ridge,WISC_FSIQ,ADHD_One,ADHD_Two,0.41055,0.001996,ADHD_One,500
5,ridge,WISC_FSIQ,ADHD_One,Healthy,0.138883,0.077844,ADHD_One,500
6,ridge,WISC_FSIQ,ADHD_Two,ADHD_Two,0.1198,0.179641,ADHD_Two,500
7,ridge,WISC_FSIQ,ADHD_Two,Healthy,0.329807,0.001996,ADHD_Two,500
8,ridge,WISC_FSIQ,ADHD_Two,ADHD_One,0.45997,0.001996,ADHD_Two,500
9,ridge,WISC_VSI,Healthy,Healthy,0.197298,0.081836,Healthy,500


## Run correction

In [6]:
a_threshold = 0.05
# df['BFR'] = bonferroni(pvals, alpha=a_threshold)
df['FDR'] = lsu(pvals, q=a_threshold)
display(df.round(4))

Unnamed: 0,Model,Population,Target,Bin,Alpha,Score,P-value,FDR
0,ridge,adhd,WISC_FSIQ,Bin 1,6401,0.3202,0.002,True
1,ridge,adhd,WISC_FSIQ,Bin 2,9901,0.1913,0.0259,True
2,ridge,adhd,WISC_VSI,Bin 1,9901,0.3133,0.002,True
3,ridge,adhd,WISC_VSI,Bin 2,9901,0.1821,0.0319,True
4,ridge,adhd,WISC_VCI,Bin 1,701,0.2925,0.002,True
5,ridge,adhd,WISC_VCI,Bin 2,9901,0.2199,0.012,True
6,ridge,adhd,WISC_FRI,Bin 1,9901,0.2095,0.01,True
7,ridge,adhd,WISC_FRI,Bin 2,7301,0.1867,0.022,True
8,ridge,adhd,WISC_WMI,Bin 1,1,0.1006,0.1437,False
9,ridge,adhd,WISC_WMI,Bin 2,1,0.1502,0.0739,False


## Save and update results file

In [10]:
df.to_csv(results_path)

## Get FDR q

In [18]:
m = len(pvals)
sort_ind = np.argsort(pvals)
qs = [(i+1.)*0.05/m for i, p in enumerate(pvals[sort_ind])]
a = pd.DataFrame([(p, q) for p, q in zip(pvals[sort_ind], qs)])
display(a)

Unnamed: 0,0,1
0,0.001996,0.000926
1,0.001996,0.001852
2,0.001996,0.002778
3,0.001996,0.003704
4,0.001996,0.00463
5,0.001996,0.005556
6,0.001996,0.006481
7,0.001996,0.007407
8,0.001996,0.008333
9,0.001996,0.009259
