# Using R to create Normalizations for gsforge

This notebook provides an example of pipling selected data from `gsforge` to an R session, then back to Python to store and view the results with `gsforge`.

In [None]:
import xarray as xr

import GSForge as gsf
import holoviews as hv
hv.extension('bokeh')

***R integration setup***

In [None]:
import rpy2.rinterface_lib.callbacks
import logging
from rpy2.robjects import pandas2ri
%load_ext rpy2.ipython
pandas2ri.activate()
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR) # Supresses verbose R output.

In [None]:
%%R
library("edgeR")

***Declare paths used***

In [None]:
# OS-independent path management.
from os import fspath, environ
from pathlib import Path
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data")).expanduser()
AGEM_PATH = OSF_PATH.joinpath("osfstorage", "rice.nc")
assert AGEM_PATH.exists()

***Load an AnnotatedGEM***

In [None]:
agem = gsf.AnnotatedGEM(AGEM_PATH)
agem

***Select counts using `get_gem_data()`***

In [None]:
counts, _ = gsf.get_gem_data(agem)

***Prepare the counts for R***

Notice the counts are transposed after this step to the form more common in R. (features by samples).

In [None]:
ri_counts = gsf.utils.R_interface.Py_counts_to_R(counts)
ri_counts.shape

***Run the normalization within R***

In [None]:
%%R -i ri_counts -o tmm_counts

dge_list <- DGEList(counts=ri_counts)
dge_list <- calcNormFactors(dge_list, method="TMM")
tmm_counts <- cpm(dge_list, normalized.lib.sizes=TRUE, log=FALSE)

In [None]:
tmm_counts = xr.DataArray(tmm_counts.T, coords=counts.coords, name='tmm_counts')
tmm_counts

***Add the counts to the GEM .data attribute.***

In [None]:
agem.data['tmm_counts'] = tmm_counts