# JuSpyce API test: data transformations

In [1]:
import sys
import os
from glob import glob
import pathlib
import numpy as np
import pandas as pd
from IPython.display import display
import seaborn as sns

# current path
wd = pathlib.Path().resolve().parent
print(wd)

# import juspyce
sys.path.append(os.path.dirname(os.path.join(wd, "juspyce")))
from juspyce.api import JuSpyce
from juspyce.stats import *
from juspyce.utils import *

/Users/llotter/projects/juspyce


## Load JuSpyce data from test_juspyce.fit.ipynb

In [2]:
juspyce_vol = JuSpyce.from_pickle(os.path.join(wd, "testing", "test_juspyce_vol.pkl.gz"))

INFO:juspyce.api:Loaded complete object from /Users/llotter/projects/juspyce/testing/test_juspyce_vol.pkl.gz.


## Transforms

We have methods implemented to transform imported dataframes.
the JuSpyce.transform function has a variable named "store". If "store" is False, transforms will be applied and the dataframes will be returned. If store is True, dataframes will be overwritten.

The results are in part compared to other functions to check for errors.

### Parcel-wise mean

In [3]:
data_tranformed = juspyce_vol.transform(
    transform="mean", 
    dataset="X", 
    store=True)
display(juspyce_vol.transforms["X-mean"])

INFO:juspyce.api:Calculating parcelwise mean of X.


Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
mean,-0.517305,-0.169558,0.014598,-1.304095,-0.144706,0.440864,-0.054003,-0.582674,-0.094969,0.184419,...,0.729475,-0.197622,-0.473498,-0.313305,-0.482039,-0.388886,1.343536,-0.192195,0.87472,-0.23597


### Partial out 'Z' data from 'X' or 'Y' data

In [4]:
data_tranformed = juspyce_vol.transform(
    transform="partial", 
    dataset="X", 
    store=True)
display(juspyce_vol.transforms["X-partial"])

print("check")
check_res = np.zeros(juspyce_vol.X.shape)
for x in range(juspyce_vol.X.shape[0]):
    check_res[x,:] = check_residuals(y=juspyce_vol.X.iloc[x,:].values.T, x=juspyce_vol.Z.values.T)
display(pd.DataFrame(check_res))

INFO:juspyce.api:Regressing 'Z' from 'X': new X = residuals.


  0%|          | 0/7 [00:00<?, ?it/s]

Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
5HT2a-cimbi36-29-beliveau2017,-0.385752,0.596669,0.422578,0.163614,1.101903,0.73312,1.023558,0.243687,0.162138,1.154529,...,-1.789098,-3.255152,-1.027084,-1.645859,-2.136799,-1.464116,-1.351405,-1.874458,-1.285802,-3.297319
NMDA-ge179-29-galovic2021,-0.962421,0.41055,0.553025,-1.762981,-0.026496,0.594174,-0.57212,-0.792777,0.395955,0.543546,...,2.244207,0.689294,0.988232,-0.959849,2.832173,2.317987,0.621775,2.745115,2.639071,0.795673
mGluR5-abp688-73-smart2019,-0.912333,-1.281395,-0.874596,-1.978698,-1.25298,-0.197213,0.254904,-0.596534,0.417433,1.066034,...,0.425561,-0.574416,-1.459815,-1.959733,-1.823035,-2.442607,0.333906,-1.98089,0.333049,-0.792469
MU-carfentanil-204-kantonen2020,-0.932585,-1.391632,-1.497899,-0.977302,-1.446294,-1.738807,-0.407909,-0.867972,-1.131666,-0.103788,...,1.426441,1.120911,-1.689079,0.492283,1.507036,2.432395,2.643303,1.446537,1.646829,0.785053
GABAa-flumazenil-6-dukart2018,0.368881,1.141642,1.356244,-0.765128,1.088604,1.881065,0.2505,-0.047438,0.892375,0.539379,...,-1.993962,-3.250922,-0.791774,-1.325229,-2.457878,-2.589603,-1.681919,-1.690795,-1.73723,-3.293755
5HT1b-p943-65-gallezot2010,-1.68175,-0.701609,-0.487479,-1.168028,0.872137,1.74397,-0.217787,-0.327244,-0.057031,-0.574288,...,0.310244,-1.16874,-1.460621,0.241254,-1.731632,-1.611388,2.273883,2.340058,0.212253,-1.033744
D2-raclopride-156-malen2022,-0.392841,-0.205691,-0.337036,-0.367779,-0.127326,-0.290396,-0.237938,-0.245386,-0.04989,-0.107126,...,4.587807,3.524098,-0.416684,-0.289626,0.05933,0.105539,3.309025,2.613692,4.800092,2.998298


check


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,106,107,108,109,110,111,112,113,114,115
0,-0.385752,0.596669,0.422578,0.163614,1.101903,0.73312,1.023558,0.243687,0.162138,1.154529,...,-1.789098,-3.255152,-1.027084,-1.645859,-2.136799,-1.464116,-1.351405,-1.874458,-1.285802,-3.297319
1,-0.962421,0.41055,0.553025,-1.762981,-0.026496,0.594174,-0.57212,-0.792777,0.395955,0.543546,...,2.244207,0.689294,0.988232,-0.959849,2.832173,2.317987,0.621775,2.745115,2.639071,0.795673
2,-0.912333,-1.281395,-0.874596,-1.978698,-1.252979,-0.197213,0.254904,-0.596534,0.417433,1.066034,...,0.425561,-0.574416,-1.459815,-1.959733,-1.823035,-2.442607,0.333906,-1.98089,0.333049,-0.792469
3,-0.932585,-1.391632,-1.497899,-0.977302,-1.446294,-1.738807,-0.407909,-0.867972,-1.131666,-0.103788,...,1.426441,1.120911,-1.689079,0.492283,1.507036,2.432395,2.643303,1.446537,1.646829,0.785053
4,0.368881,1.141642,1.356244,-0.765128,1.088604,1.881065,0.2505,-0.047438,0.892375,0.539379,...,-1.993962,-3.250922,-0.791774,-1.325229,-2.457878,-2.589603,-1.681919,-1.690795,-1.73723,-3.293755
5,-1.68175,-0.701609,-0.487479,-1.168028,0.872137,1.74397,-0.217787,-0.327244,-0.057031,-0.574288,...,0.310244,-1.16874,-1.460621,0.241254,-1.731632,-1.611388,2.273883,2.340058,0.212253,-1.033744
6,-0.392841,-0.205691,-0.337036,-0.367779,-0.127326,-0.290396,-0.237938,-0.245386,-0.04989,-0.107126,...,4.587807,3.524098,-0.416684,-0.289626,0.05933,0.105539,3.309025,2.613692,4.800092,2.998298


### Dimensionality reduction

#### PCA

In [5]:
data_tranformed, ev, loadings = juspyce_vol.transform(
    transform="pca", 
    dataset="X", 
    n_components=3, # predefined number of components
    store=True)
display(juspyce_vol.transforms["X-pca"])
display(ev) # stored in juspyce_vol.dim_red["ev"]) if store==True
display(loadings) # stored in juspyce_vol.dim_red["loadings"]) if store==True

INFO:juspyce.api:Calculating pca on 'X' data.
INFO:juspyce.stats:Performing dimensionality reduction using pca (max components: 3, min EV: None).
INFO:juspyce.stats:Returning 3 principal component(s).


Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
c0,-0.434868,-0.954757,-1.042387,-0.64664,-1.58558,-1.848065,-1.177946,-0.82641,-1.010176,-1.014182,...,5.192016,5.799974,1.091386,1.846438,4.372342,4.410324,4.311578,4.220963,5.135243,5.615507
c1,1.245912,0.918874,0.270119,3.936763,1.210892,-0.643092,0.246233,1.787727,0.354218,-0.512522,...,-1.884682,0.062297,1.139239,0.711208,1.080048,1.066632,-3.653426,1.574231,-2.095394,0.141226
c2,-1.644314,-0.666005,-0.91142,0.814236,1.492899,1.204068,0.063741,0.675749,0.343611,-0.421786,...,0.830551,-0.611042,-2.134716,-0.502599,-2.033851,-1.879557,1.129982,4.371903,0.778511,-0.823644


c0    0.428652
c1    0.225274
c2    0.147871
dtype: float32

Unnamed: 0,c0,c1,c2
5HT2a-cimbi36-29-beliveau2017,-0.848415,-0.25072,-0.021113
NMDA-ge179-29-galovic2021,0.6404,-0.42896,-0.274238
mGluR5-abp688-73-smart2019,-0.354129,-0.817509,-0.150552
MU-carfentanil-204-kantonen2020,0.634844,-0.527087,-0.149397
GABAa-flumazenil-6-dukart2018,-0.82446,-0.406341,-0.139335
5HT1b-p943-65-gallezot2010,-0.117607,-0.330298,0.91553
D2-raclopride-156-malen2022,0.805384,-0.33121,0.238439


#### ICA

In [6]:
data_tranformed, ev, loadings = juspyce_vol.transform(
    transform="ica", 
    dataset="X", 
    n_components=3,
    store=True)
display(juspyce_vol.transforms["X-ica"])
display(ev) 
display(loadings) 

INFO:juspyce.api:Calculating ica on 'X' data.
INFO:juspyce.stats:Performing dimensionality reduction using ica (max components: 3, min EV: None).
INFO:juspyce.stats:Returning 3 independent component(s).


Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
c0,-0.103681,-0.087691,-0.048368,-0.270699,-0.107503,0.007326,-0.043491,-0.133377,-0.044863,0.007419,...,0.252056,0.127676,-0.064112,-0.007226,0.017334,0.020137,0.350662,0.023524,0.264349,0.116713
c1,0.093036,0.015562,0.007843,0.030798,-0.108844,-0.151543,-0.045995,-0.023106,-0.050011,-0.034261,...,0.12868,0.27253,0.177799,0.118914,0.307827,0.301541,0.033676,0.001016,0.123645,0.277193
c2,-0.11019,-0.054745,-0.089654,0.13123,0.101964,0.033594,-0.018567,0.070097,0.009342,-0.068146,...,0.150069,0.095941,-0.112903,0.021712,-0.026357,-0.013865,0.113634,0.472615,0.140165,0.076802


c0   NaN
c1   NaN
c2   NaN
dtype: float32

Unnamed: 0,c0,c1,c2
5HT2a-cimbi36-29-beliveau2017,-0.147529,-0.73076,-0.476803
NMDA-ge179-29-galovic2021,0.644497,0.499886,-0.06374
mGluR5-abp688-73-smart2019,0.567206,-0.467819,-0.525183
MU-carfentanil-204-kantonen2020,0.739116,0.39594,0.010338
GABAa-flumazenil-6-dukart2018,-0.006137,-0.701024,-0.610565
5HT1b-p943-65-gallezot2010,0.311269,-0.691594,0.621233
D2-raclopride-156-malen2022,0.666229,0.386312,0.471275


#### Factor analysis

In [7]:
data_tranformed, ev, loadings = juspyce_vol.transform(
    transform="fa", 
    dataset="X", 
    n_components=3,
    min_ev=0.9, # minimum explained variance, works for PCA and FA, will overwrite n_components
    store=True)
display(juspyce_vol.transforms["X-fa"])
display(ev) 
display(loadings) 

INFO:juspyce.api:Calculating fa on 'X' data.
INFO:juspyce.stats:Performing dimensionality reduction using fa (max components: 7, min EV: 0.9).
INFO:juspyce.stats:Returning 6 factor(s).


Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
c0,0.342994,0.835304,0.956099,-0.442902,0.898987,1.317312,0.495793,-0.058211,0.25684,0.42425,...,-2.451515,-3.289781,-0.85651,-0.675089,-2.521035,-2.113833,-1.121392,-2.597839,-2.198707,-3.259078
c1,0.144096,-0.186982,0.359402,-2.350961,-0.749243,0.72324,-0.049305,-0.970915,0.112278,0.285169,...,-0.131865,-0.851011,-0.823761,-0.502856,-2.146401,-2.335408,1.218569,-3.111286,-0.139022,-0.94984
c2,-0.936345,-1.582755,-1.539249,-1.31151,-1.843139,-1.888146,-0.308529,-1.016409,-1.319099,-0.09658,...,2.046769,2.196316,-0.448375,0.873464,1.952206,2.565847,3.216041,0.34431,2.08699,2.113373
c3,0.036789,0.871663,1.283467,-2.40626,-0.17931,1.099121,-0.781776,-1.204076,0.143542,-0.086888,...,1.852346,0.867612,1.367271,0.22159,1.844382,1.455564,1.63119,0.589708,2.121855,1.034465
c4,-0.685217,-0.642567,-0.758988,-0.47269,-0.391219,-0.5889,-0.127881,-0.339873,-0.387074,-0.106403,...,4.166374,3.266621,-0.575622,-0.049804,0.051559,0.289312,3.588616,2.12725,4.322677,2.834279
c5,-0.281956,0.313698,0.232975,-0.013212,1.196265,1.289649,0.257249,0.173152,0.313503,-0.101973,...,1.423392,-0.159477,-1.740924,-0.650902,-3.17758,-2.798597,1.813638,0.99652,1.644972,-0.606916


c0    0.187150
c1    0.139961
c2    0.129882
c3    0.122093
c4    0.085733
c5    0.051892
dtype: float32

Unnamed: 0,c0,c1,c2,c3,c4,c5
5HT2a-cimbi36-29-beliveau2017,0.973762,-0.061019,0.074859,-0.097591,0.055006,-0.02827
NMDA-ge179-29-galovic2021,0.002236,-0.110906,0.127821,0.845569,0.08532,-0.046472
mGluR5-abp688-73-smart2019,0.010817,0.91836,0.102534,-0.115004,0.071669,-0.03868
MU-carfentanil-204-kantonen2020,-0.009184,0.091429,0.895346,0.127413,-0.06981,0.037926
GABAa-flumazenil-6-dukart2018,0.53345,0.32277,-0.262679,0.258413,-0.172441,0.08753
5HT1b-p943-65-gallezot2010,0.001006,-0.024138,0.02644,-0.030047,0.018709,0.588576
D2-raclopride-156-malen2022,-0.277596,0.08482,-0.073291,0.181651,0.741436,0.057138


### Overwrite original dataframe

The original dataframes X and Y can be overwritten if `replace==True`

In [8]:
print("Shape original X:", juspyce_vol.X.shape)

Shape original X: (7, 116)


In [9]:
data_tranformed = juspyce_vol.transform(
    transform="mean", 
    dataset="X", 
    store=True,
    replace=True)
display(juspyce_vol.transforms["X-mean"])
display(juspyce_vol.X)

INFO:juspyce.api:Calculating parcelwise mean of X.
INFO:juspyce.api:Replacing 'X' data with transformed data.


Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
mean,-0.517305,-0.169558,0.014598,-1.304095,-0.144706,0.440864,-0.054003,-0.582674,-0.094969,0.184419,...,0.729475,-0.197622,-0.473498,-0.313305,-0.482039,-0.388886,1.343536,-0.192195,0.87472,-0.23597


Unnamed: 0,LH_Vis_1,LH_Vis_2,LH_Vis_3,LH_Vis_4,LH_Vis_5,LH_Vis_6,LH_Vis_7,LH_Vis_8,LH_Vis_9,LH_SomMot_1,...,PUT-rh,CAU-rh,HIP-lh,AMY-lh,pTHA-lh,aTHA-lh,NAc-lh,GP-lh,PUT-lh,CAU-lh
mean,-0.517305,-0.169558,0.014598,-1.304095,-0.144706,0.440864,-0.054003,-0.582674,-0.094969,0.184419,...,0.729475,-0.197622,-0.473498,-0.313305,-0.482039,-0.388886,1.343536,-0.192195,0.87472,-0.23597


In [10]:
print("Shape new X:", juspyce_vol.X.shape)

Shape new X: (1, 116)
