# JuSpyce API test: group permutation test

In [1]:
import sys
import os
from glob import glob
import pathlib
import numpy as np
import pandas as pd
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt

# current path
wd = pathlib.Path().resolve().parent
print(wd)

# import juspyce
sys.path.append(os.path.dirname(os.path.join(wd, "juspyce")))
from juspyce.api import JuSpyce
from juspyce.stats import *
from juspyce.utils import *

/Users/llotter/projects/juspyce


## Load JuSpyce data from test_juspyce.fit.ipynb

In [2]:
juspyce_vol = JuSpyce.from_pickle(os.path.join(wd, "testing", "test_juspyce_vol.pkl.gz"))

INFO:juspyce.api:Loaded complete object from /Users/llotter/projects/juspyce/testing/test_juspyce_vol.pkl.gz.


## Permutation of group assignment to compare predictions between groups

This is based on the idea of the original [JuSpace](https://github.com/juryxy/JuSpace) toolbox. If group differences in a certain imaging modality have biological meaning, they may align with the distribution of a certain neurotransmitter (e.g., see in the JuSpace paper: the difference in rsfMRI activity between patients with Parkinson's and healthy controls aligns with dopaminergic transmitter maps).
Two groups are defined via a vector with the length of the "Y" dataframe. The difference between two groups (mean difference, Cohen's d, every vector in group A - mean of group B, ...) is calculated (`JuSpyce.transform()`) and a prediction function (`JuSpyce.predict()`) is applied. The group labels are permuted `n_perm` times and the transform and prediction process is repeated to generate null distributions of "prediction values" ($R^2$, correlation coefficients, ...). From these, p values are calculated.  
In many cases, these null distributions are based on the means of the predicted values rather then the individual predicted values. This makes sense as one would often ask questions as, for example, "Does the *average* deviation of brain activity in an individual relative to a reference cohort relate to a certain predictor?". This behavior can be modified via `JuSpyce.permute_groups(p_from_average_y=True)` with `True`, `mean`, or `median` leading to calculation of a p value for the group-wise mean/median of prediction values (True -> median) and `False` forcing individual p values. 

### Grouping variable

In [3]:
n_Y = juspyce_vol.Y.shape[0]
groups = [0] * int(n_Y/2) + [1] * int(n_Y/2)
print(n_Y, groups)

28 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


### Does the mean difference between two groups correlate with a certain transmitter?

mean difference of two groups -> one vector 

In [4]:
# permute groups
p_data, true_predictions, null_predictions = juspyce_vol.permute_groups(
    method="spearman", 
    comparison="diff(mean(A),mean(B))", 
    groups=groups,
    n_perm=1000, 
    p_tail="two",
    r_to_z=True, adjust_r2=True, mlr_individual=True,
    n_proc=8, n_proc_predict=1, seed=None,
    verbose=True, store=True)

INFO:juspyce.api:Running 'true' group comparison and prediction (comparison = 'diff(mean(A),mean(B))', method = 'spearman', using mean of predictions).
INFO:juspyce.api:Subtracting parcelwise mean of B from mean of A: new Y = mean(Y[A]) - mean(Y[B]).


Predicting (spearman, 1 proc):   0%|          | 0/1 [00:00<?, ?it/s]

INFO:juspyce.api:Running null group comparisons and predictions (comparison = 'diff(mean(A),mean(B))', method = 'spearman', using mean of predictions).


Null comparisons (spearman, 8 proc):   0%|          | 0/1000 [00:00<?, ?it/s]

INFO:juspyce.api:Calculating exact p-values (tails = 'two').


In [5]:
# look at the result
juspyce_vol.p_comparisons.keys()

dict_keys(['diff(mean(A),mean(B))-spearman'])

In [6]:
juspyce_vol.p_comparisons["diff(mean(A),mean(B))-spearman"]

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-diff(mean(A),mean(B))",0.304,0.736,0.796,0.598,0.228,0.684,0.786


### Can the effect size between two groups be predicted from a certain transmitter?

In [7]:
# compare
juspyce_vol.compare(
    comparison="cohen(A,B)",
    groups=groups
)
# predict
juspyce_vol.predict(
    method="dominance",
    comparison="cohen(A,B)"
)
# permute groups
p_data, true_predictions, null_predictions = juspyce_vol.permute_groups(
    method="dominance", 
    comparison="cohen(A,B)", 
    groups=groups,
    n_perm=1000, 
    p_tail="two",
    r_to_z=True, adjust_r2=True, mlr_individual=True,
    n_proc=8, n_proc_predict=1, seed=None,
    verbose=True, store=True)

INFO:juspyce.api:Calculating parcelwise effect size between A and B (cohen, paired: False).


  0%|          | 0/116 [00:00<?, ?it/s]

Predicting (dominance, 8 proc):   0%|          | 0/1 [00:00<?, ?it/s]

INFO:juspyce.api:Running 'true' group comparison and prediction (comparison = 'cohen(A,B)', method = 'dominance', using mean of predictions).
INFO:juspyce.api:Calculating parcelwise effect size between A and B (cohen, paired: False).


  0%|          | 0/116 [00:00<?, ?it/s]

Predicting (dominance, 1 proc):   0%|          | 0/1 [00:00<?, ?it/s]

INFO:juspyce.api:Running null group comparisons and predictions (comparison = 'cohen(A,B)', method = 'dominance', using mean of predictions).


Null comparisons (dominance, 8 proc):   0%|          | 0/1000 [00:00<?, ?it/s]

INFO:juspyce.api:Calculating exact p-values (tails = 'two').


In [8]:
# look at the result
juspyce_vol.p_comparisons.keys()

dict_keys(['diff(mean(A),mean(B))-spearman', 'cohen(A,B)-dominance_total', 'cohen(A,B)-dominance_individual', 'cohen(A,B)-dominance_relative', 'cohen(A,B)-dominance_full_r2'])

In [9]:
display(juspyce_vol.comparisons["cohen(A,B)"])
display(juspyce_vol.predictions["cohen(A,B)-dominance_total"])
display(juspyce_vol.p_comparisons["cohen(A,B)-dominance_total"])

Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
"cohen(A,B)",0.276661,-0.517828,-0.696612,-0.325058,-0.60921,-0.590054,-0.575566,-0.433675,-0.551265,0.381074,...,0.231055,0.146563,0.221554,-0.316784,0.543487,0.423675,0.000933,0.213862,0.331783,0.157934


Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"cohen(A,B)",0.038337,0.000977,0.023584,0.021675,0.082861,0.005747,0.018984


Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.828,0.25,0.924,0.76,0.138,0.87,0.774


### Can the individual difference between the individuals of one group relative to the other group ("reference") be predicted from a certain transmitter?

In [10]:
# compare
juspyce_vol.compare(
    comparison="z(A,B)", # alternative: diff(A,mean(B))
    groups=groups
)
# predict
juspyce_vol.predict(
    method="slr",
    comparison="z(A,B)"
)
# permute groups
p_data, true_predictions, null_predictions = juspyce_vol.permute_groups(
    method="slr", 
    comparison="z(A,B)", 
    groups=groups,
    n_perm=1000, 
    p_tail="two",
    r_to_z=True, adjust_r2=True, mlr_individual=True,
    n_proc=8, n_proc_predict=1, seed=None,
    verbose=True, store=True)

INFO:juspyce.api:Calculating parcelwise z scores for A relative to B: new Y = (Y[A] - mean(Y[B])) / std(Y[B]).


Predicting (slr, 8 proc):   0%|          | 0/14 [00:00<?, ?it/s]

INFO:juspyce.api:Running 'true' group comparison and prediction (comparison = 'z(A,B)', method = 'slr', using mean of predictions).
INFO:juspyce.api:Calculating parcelwise z scores for A relative to B: new Y = (Y[A] - mean(Y[B])) / std(Y[B]).


Predicting (slr, 1 proc):   0%|          | 0/14 [00:00<?, ?it/s]

INFO:juspyce.api:Running null group comparisons and predictions (comparison = 'z(A,B)', method = 'slr', using mean of predictions).


Null comparisons (slr, 8 proc):   0%|          | 0/1000 [00:00<?, ?it/s]

INFO:juspyce.api:Calculating exact p-values (tails = 'two').


In [11]:
display(juspyce_vol.comparisons["z(A,B)"])
display(juspyce_vol.predictions["z(A,B)-slr"])
display(juspyce_vol.p_comparisons["z(A,B)-slr"])

Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
control,-1.307879,-0.5946,-1.141572,-0.62887,-0.825993,-1.152819,-1.020215,-0.363459,-0.705579,-0.868137,...,0.199933,0.003804,-0.991716,-0.486386,2.355779,0.727542,-0.693365,0.287516,0.728857,-0.541071
touch,-0.613116,-0.789268,-0.691663,-0.600602,-0.635929,-0.838796,-0.701211,-0.746437,-1.059448,1.398766,...,-0.288611,-0.591191,-0.654405,-0.340006,0.419474,-0.17874,-0.362638,-0.757782,-0.603605,-0.566459
interoception,-0.565262,-0.533565,-0.691961,-0.51,-0.609814,-0.291691,-0.700389,-0.910814,-1.133115,1.07566,...,2.109133,0.376119,-0.19314,0.273506,-0.084488,-0.044081,-0.013891,-0.03862,0.423799,0.087311
learning,4.066362,-0.050946,0.088384,0.04452,-0.252872,0.029881,-0.408359,-0.086602,-0.356469,-1.045881,...,1.121934,1.796205,4.657692,0.116224,0.316764,1.129707,1.698001,2.003917,2.289148,2.231389
attention,-1.427004,0.523679,-0.011685,0.77442,0.524566,0.335434,0.616278,1.236984,0.799963,-0.589778,...,-0.692167,-0.723843,-1.684373,-0.897631,-0.96408,-0.792103,-0.799853,-0.942099,-1.109166,-0.802253
language,-0.146665,-0.459491,-0.937656,-0.060815,-0.775598,-0.535819,0.104261,-0.780245,-0.943557,0.781672,...,-0.431219,0.014725,-0.719105,-0.439441,-1.066447,0.218143,-0.067029,-0.057223,-0.237561,0.183753
interaction,1.588684,0.411707,-0.020956,0.608337,0.399552,-0.158962,0.096522,0.269134,-0.397154,2.33272,...,-0.09907,-0.410343,1.790933,-0.388418,0.502116,-0.730769,-0.540218,0.018641,0.15427,-0.942372
inhibition,-1.776383,-0.934862,-1.082668,-0.60609,-0.875604,-1.297595,-0.950768,-0.741533,-1.133768,-0.144606,...,0.155098,0.117653,-1.612033,-0.619051,0.89303,-0.218816,-0.561609,0.288745,0.712089,-0.448295
somatosensory,-0.63471,-0.844808,-0.712754,-0.607491,-0.666965,-0.78864,-0.751607,-0.80985,-1.116823,1.440507,...,-0.310711,-0.746466,-0.688968,-0.358052,0.021869,-0.405056,-0.33133,-0.837018,-0.614067,-0.564958
decision,-1.205405,-0.120479,-0.977907,-0.022699,-0.568839,-1.039217,0.389823,-0.314305,-0.858288,-1.368766,...,0.341518,1.978846,-1.570568,-0.500041,-0.669562,1.191916,2.084474,2.056355,1.831413,2.460268


Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
control,0.125241,0.002638,0.006781,-0.00571,0.129969,0.011245,-0.008132
touch,0.015636,0.034241,0.016508,-0.007284,0.000147,-0.002892,0.013234
interoception,-0.006167,0.097305,0.120509,0.128111,-0.001383,0.010386,0.036733
learning,0.181452,0.12879,0.140292,0.015167,0.116469,0.023274,0.187581
attention,0.01828,0.12935,0.00114,0.117117,0.010131,0.039305,0.044785
language,0.08449,0.003728,0.013689,-0.00532,0.001832,0.022813,-0.003438
interaction,0.010914,-0.003194,0.002423,0.060424,-0.008647,-0.003723,-0.008387
inhibition,0.063814,0.0144,-0.008047,-0.006604,0.060887,0.045397,-0.008019
somatosensory,0.004329,0.046277,0.031009,-0.006691,-0.005629,-0.006262,0.017461
decision,-0.003941,-0.00539,-0.007605,0.13321,0.063178,0.178575,0.126534


Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-z(A,B)",0.68,0.634,0.818,0.036,0.71,0.096,0.368


## Correct p-values

p values can be corrected across dataframes or rows/columns of dataframes using `JuSpyce.correct_p()`. The method will, if not provided differently, loop over all p-value dataframes and apply multiple comparison correction methods from `statsmodels.stats.multitest.multipletests`.

Results will be stored in the `JuSpyce.p_comparisons` dict as `JuSpyce.p_comparisons["comparison_name-prediction_name--correction_method"]`, e.g., if comparison is `cohen(A,B)`, method is `spearman` and correction is `fdr_bh`: `juspyce_vol.p_predictions["cohen(A,B)-spearman--fdr_bh"]`

In [12]:
juspyce_vol.correct_p(
    analysis="comparisons", # one of "predictions" or "comparisons" -> here: predictions
    method="all", # if all, iterate over all dataframes (but calculate values for each individual dataframe)
    mc_alpha=0.05, # alpha treshold, should have no effect
    mc_method="fdr_bh", # correction method passed to statsmodels
    mc_dimension="array") # 'array', 'row' or 'column'
for k in juspyce_vol.p_comparisons:
    display(k)
    display(juspyce_vol.p_comparisons[k])

'diff(mean(A),mean(B))-spearman'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-diff(mean(A),mean(B))",0.304,0.736,0.796,0.598,0.228,0.684,0.786


'cohen(A,B)-dominance_total'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.828,0.25,0.924,0.76,0.138,0.87,0.774


'cohen(A,B)-dominance_individual'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.544,0.694,0.016,0.982,0.294,0.432,0.788


'cohen(A,B)-dominance_relative'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.64,0.256,0.908,0.864,0.008,0.952,0.998


'cohen(A,B)-dominance_full_r2'

Unnamed: 0,dominance_full_r2
"mean-cohen(A,B)",0.504


'z(A,B)-slr'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-z(A,B)",0.68,0.634,0.818,0.036,0.71,0.096,0.368


'cohen(A,B)-dominance_relative--fdr_bh'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.998,0.896,0.998,0.998,0.056,0.998,0.998


'diff(mean(A),mean(B))-spearman--fdr_bh'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-diff(mean(A),mean(B))",0.796,0.796,0.796,0.796,0.796,0.796,0.796


'cohen(A,B)-dominance_individual--fdr_bh'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.919333,0.919333,0.112,0.982,0.919333,0.919333,0.919333


'z(A,B)-slr--fdr_bh'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-z(A,B)",0.818,0.818,0.818,0.252,0.818,0.336,0.818


'cohen(A,B)-dominance_full_r2--fdr_bh'

Unnamed: 0,dominance_full_r2
"mean-cohen(A,B)",0.504


'cohen(A,B)-dominance_total--fdr_bh'

Unnamed: 0,5HT2a-cimbi36-29-beliveau2017,NMDA-ge179-29-galovic2021,mGluR5-abp688-73-smart2019,MU-carfentanil-204-kantonen2020,GABAa-flumazenil-6-dukart2018,5HT1b-p943-65-gallezot2010,D2-raclopride-156-malen2022
"mean-cohen(A,B)",0.924,0.875,0.924,0.924,0.875,0.924,0.924
