# Conduct PROCESS macro analyses in a Jupyter environment
The PROCESS toolbox is also released for R. We can integrate R code inside Jupyter to make it more convenient to work with different programming languages. We need `%load_ext rpy2.ipython` to load the R magic command. We can use `%%R` to indicate that the cell contains R code and `-i` to pass variables from Python to R. We can also use `-o` to pass variables from R to Python.

In [15]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [16]:
import os 
import pandas as pd

process_sw_dir = '/Users/melissa/Dokumente/PhD/sw/processv43/PROCESS v4.3 for R' # change to where PROCESS is saved
wdir = os.getcwd()  # where the current project is located
out_dir = os.path.join(wdir, 'example_data')  # where the output should be saved
os.makedirs(out_dir, exist_ok=True)

In [9]:
# load example data
ct_lh = pd.read_csv('example_data/aparc_COBRE_lh_CT.txt', sep='\t')
ct_lh.rename(columns={'lh.aparc.thickness':'SubjID'}, inplace=True)
ct_lh.head()

Unnamed: 0,SubjID,lh_bankssts_thickness,lh_caudalanteriorcingulate_thickness,lh_caudalmiddlefrontal_thickness,lh_cuneus_thickness,lh_entorhinal_thickness,lh_fusiform_thickness,lh_inferiorparietal_thickness,lh_inferiortemporal_thickness,lh_isthmuscingulate_thickness,...,lh_superiorparietal_thickness,lh_superiortemporal_thickness,lh_supramarginal_thickness,lh_frontalpole_thickness,lh_temporalpole_thickness,lh_transversetemporal_thickness,lh_insula_thickness,lh_MeanThickness_thickness,BrainSegVolNotVent,eTIV
0,A00000300,2.4,2.228,2.481,1.861,2.807,2.427,2.324,2.589,2.143,...,2.092,2.645,2.524,2.606,3.784,2.427,2.907,2.38597,1015381.0,1523614.0
1,A00000368,2.511,2.429,2.423,2.011,3.361,2.595,2.423,2.541,2.343,...,2.13,2.662,2.481,2.61,3.749,2.273,2.598,2.38727,1034220.0,1497609.0
2,A00000456,2.218,2.059,2.233,1.659,2.953,2.359,2.108,2.464,2.213,...,1.955,2.459,2.274,2.851,3.095,2.152,2.639,2.22066,1046529.0,1516138.0
3,A00000541,2.354,2.709,2.556,1.759,3.31,2.448,2.29,2.773,2.376,...,1.952,2.31,2.254,3.607,3.203,1.869,3.023,2.35949,793725.0,1091874.0
4,A00000838,2.542,2.372,2.64,1.806,3.217,2.547,2.458,2.469,2.309,...,2.317,2.812,2.579,2.741,3.437,2.467,3.056,2.47454,1278601.0,1738219.0


In [10]:
# load meta information
meta = pd.read_csv('example_data/COBRE_meta.csv')
meta.head()

Unnamed: 0,SubjID,Dx,Age,Sex,AP,CPZ,AO,DURILL,PANSSTOT,PANSSPOS,PANSSNEG,SAPSTOT,SANSTOT,HAND,PARENTSES,IQ
0,A00000300,0,35,1,,,,,,,,,,0,5.0,114.0
1,A00000368,1,52,1,2.0,50.0,16.0,36.0,63.0,19.0,16.0,,,0,4.0,100.0
2,A00000456,1,53,1,2.0,75.0,16.0,37.0,59.0,11.0,14.0,,,0,4.0,84.0
3,A00000541,1,48,2,,87.1,31.0,17.0,62.0,21.0,9.0,,,0,7.0,99.0
4,A00000838,1,28,1,2.0,1000.0,22.0,6.0,55.0,8.0,18.0,,,0,,108.0


In [12]:
# merge
data = pd.merge(ct_lh, meta, on='SubjID')
data.head()

Unnamed: 0,SubjID,lh_bankssts_thickness,lh_caudalanteriorcingulate_thickness,lh_caudalmiddlefrontal_thickness,lh_cuneus_thickness,lh_entorhinal_thickness,lh_fusiform_thickness,lh_inferiorparietal_thickness,lh_inferiortemporal_thickness,lh_isthmuscingulate_thickness,...,AO,DURILL,PANSSTOT,PANSSPOS,PANSSNEG,SAPSTOT,SANSTOT,HAND,PARENTSES,IQ
0,A00000300,2.4,2.228,2.481,1.861,2.807,2.427,2.324,2.589,2.143,...,,,,,,,,0,5.0,114.0
1,A00000368,2.511,2.429,2.423,2.011,3.361,2.595,2.423,2.541,2.343,...,16.0,36.0,63.0,19.0,16.0,,,0,4.0,100.0
2,A00000456,2.218,2.059,2.233,1.659,2.953,2.359,2.108,2.464,2.213,...,16.0,37.0,59.0,11.0,14.0,,,0,4.0,84.0
3,A00000541,2.354,2.709,2.556,1.759,3.31,2.448,2.29,2.773,2.376,...,31.0,17.0,62.0,21.0,9.0,,,0,7.0,99.0
4,A00000838,2.542,2.372,2.64,1.806,3.217,2.547,2.458,2.469,2.309,...,22.0,6.0,55.0,8.0,18.0,,,0,,108.0


In [20]:
# filter for patients only
data_scz = data[data['Dx'] == 1]
print(data_scz.shape)
data_scz.head()

(60, 53)


Unnamed: 0,SubjID,lh_bankssts_thickness,lh_caudalanteriorcingulate_thickness,lh_caudalmiddlefrontal_thickness,lh_cuneus_thickness,lh_entorhinal_thickness,lh_fusiform_thickness,lh_inferiorparietal_thickness,lh_inferiortemporal_thickness,lh_isthmuscingulate_thickness,...,AO,DURILL,PANSSTOT,PANSSPOS,PANSSNEG,SAPSTOT,SANSTOT,HAND,PARENTSES,IQ
1,A00000368,2.511,2.429,2.423,2.011,3.361,2.595,2.423,2.541,2.343,...,16.0,36.0,63.0,19.0,16.0,,,0,4.0,100.0
2,A00000456,2.218,2.059,2.233,1.659,2.953,2.359,2.108,2.464,2.213,...,16.0,37.0,59.0,11.0,14.0,,,0,4.0,84.0
3,A00000541,2.354,2.709,2.556,1.759,3.31,2.448,2.29,2.773,2.376,...,31.0,17.0,62.0,21.0,9.0,,,0,7.0,99.0
4,A00000838,2.542,2.372,2.64,1.806,3.217,2.547,2.458,2.469,2.309,...,22.0,6.0,55.0,8.0,18.0,,,0,,108.0
5,A00000909,2.434,2.334,2.547,1.869,3.367,2.605,2.364,2.601,2.383,...,18.0,9.0,74.0,15.0,18.0,,,0,4.0,90.0


# Moderation analysis
Pass all the information from Python to R that you need for executing the line. Otherwise, R doesn't "know" the variables that you have defined in Python.

Example question: how does SES moderate the relationship between inferiortemporal thickness and IQ?

In [26]:
%%R -i data_scz -i process_sw_dir -i out_dir -i wdir

# load the process script
setwd(process_sw_dir)
source('process.R')
setwd(wdir)

# run moderation analysis: GA -> SES -> PC1
output_file <- file.path(out_dir, 'moderation_mdl1_inferiortemporal-SES-IQ.txt')
sink(output_file)
process(data = data_scz, 
        y="IQ", 
        x="lh_inferiortemporal_thickness", 
        w="PARENTSES",
        model=1, 
        boot = 5000,
        conf = 95,
        center = FALSE,
        plot = 1,
        save=1,
        seed=1234)

sink()
