In [None]:
# Example: Loading an RData object into Python with rpy2
# ------------------------------------------------------
# Prerequisites:
# 1. Install R (https://www.r-project.org/)
# 2. Install rpy2 in your Python environment:
#      pip install rpy2
# 3. (Optional) Create a dedicated virtualenv or conda env for reproducibility.

# ---- User-Configurable Section ----
# Path to the directory containing your .RData file
rdata_dir = "/data"
# Name of the .RData file
rdata_file = "datasets_from_repo.RData"
# Optional: set your working directory in Python to the RData folder
os.chdir(rdata_dir)
# Name of the variable/object inside .RData
namedobject = "microbialscalerepository"

# ---- Imports ----
import os
from rpy2.robjects import r, globalenv
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.vectors import ListVector
from rpy2.robjects import default_converter
from rpy2.robjects import numpy2ri
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import Converter

# Activate pandas <-> R dataframe conversion
pandas2ri.activate()

# ---- Helper function: Recursive conversion of R lists to Python dicts ----
def r_to_py(obj):
    """
    Recursively convert an R object (ListVector or data.frame) into
    native Python types (dict, pandas.DataFrame, scalar).
    """
    # If it's an R list (ListVector), convert to dict
    if isinstance(obj, ListVector):
        py_dict = {}
        for name in obj.names:
            # R names are unicode, convert to str
            key = str(name)
            # Retrieve the element
            child = obj.rx2(key)
            # Recurse
            py_dict[key] = r_to_py(child)
        return py_dict

    # Fallback: try pandas conversion for data.frames
    try:
        with localconverter(default_converter + pandas2ri.converter):
            return pandas2ri.rpy2py(obj)
    except Exception:
        pass

    # Fallback: try numpy conversion or as-is
    try:
        with localconverter(default_converter + numpy2ri.converter):
            return numpy2ri.rpy2py(obj)
    except Exception:
        return obj

# ---- Main loading logic ----

# 1) Ensure the RData directory exists
if not os.path.isdir(rdata_dir):
    raise FileNotFoundError(f"Directory not found: {rdata_dir}")

# 2) Change into the directory (optional)
os.chdir(rdata_dir)

# 3) Load the RData file into R's global environment
r['load'](rdata_file)

# 4) Retrieve the object (assumes it's named 'microbialscalerepository')
if namedobject not in globalenv:
    raise KeyError("'microbialscalerepository' not found in RData.\n"
                   "Check that the .RData file defines this object.")
repo_r = globalenv[namedobject]

# 5) Convert to Python native structure
microbialscalerepository = r_to_py(repo_r)

# 6) Inspect the top-level keys
print("Loaded studies:", list(microbialscalerepository.keys()))

# Now `microbialscalerepository` is a nested dict of study entries,
# with any tax data.frames converted to pandas.DataFrame objects.
