In [8]:
mytuple = ("x_centroid", "y_centroid")

In [9]:
mytuple[0]

'x_centroid'

In [10]:
mytuple[1]

'y_centroid'

In [1]:
from perseuspy import pd

In [2]:
import opendvp

In [3]:
adata = opendvp.io.import_perseus(
    path_to_perseus_txt="test_data/io/Perseus_v1.6.15.0.txt",
    n_var_metadata_rows=5
)

[32m10:58:40.67[0m | [1mINFO[0m | Reading Perseus file from: test_data/io/Perseus_v1.6.15.0.txt
[32m10:58:40.69[0m | [1mINFO[0m | Perseus DataFrame shape: (3526, 16)
[32m10:58:40.70[0m | [1mINFO[0m | Data matrix shape: (11, 3526)
[32m10:58:40.71[0m | [32m[1mSUCCESS[0m | AnnData object created from Perseus file.




In [4]:
adata.shape

(11, 3526)

In [5]:
adata.obs.columns.tolist()

['Column Name',
 'Heart_Condition',
 'Ischemia_region',
 'Sample_type',
 'Replicate']

In [7]:
df = pd.read_csv("test_data/io/thresholds.csv")

In [8]:
df

Unnamed: 0,sample_id,marker_id,gate_value
0,991,mean_Vimentin,574.380913
1,991,mean_CD3e,350.0
2,991,mean_panCK,50.0
3,991,mean_CD8,1200.0
4,991,mean_COL1A1,1320.0
5,991,mean_CD20,800.0
6,991,mean_CD68,271.581386
7,991,mean_Ki67,18.105061


In [9]:
df.dtypes

sample_id       int64
marker_id      object
gate_value    float64
dtype: object

In [45]:
data = {
        'CellID': [0, 1, 2],
        'Y_centroid': [10.0, 20.0, 30.0],
        'X_centroid': [15.0, 25.0, 35.0],
        'Area': [100, 150, 120],
        'MajorAxisLength': [12, 14, 13],
        'MinorAxisLength': [8, 9, 10],
        'Eccentricity': [0.5, 0.6, 0.7],
        'Orientation': [0, 45, 90],
        'Extent': [0.8, 0.85, 0.9],
        'Solidity': [0.95, 0.96, 0.97],
        'mean_CD3': [1.0, 2.0, 3.0],
        'mean_CD8': [0.5, 0.8, 1.1]
    }
df = pd.DataFrame(data)

In [46]:
df

Unnamed: 0,CellID,Y_centroid,X_centroid,Area,MajorAxisLength,MinorAxisLength,Eccentricity,Orientation,Extent,Solidity,mean_CD3,mean_CD8
0,0,10.0,15.0,100,12,8,0.5,0,0.8,0.95,1.0,0.5
1,1,20.0,25.0,150,14,9,0.6,45,0.85,0.96,2.0,0.8
2,2,30.0,35.0,120,13,10,0.7,90,0.9,0.97,3.0,1.1


In [49]:
df.to_csv(path_or_buf="test_3.csv", index=False)

In [50]:
df.columns

Index(['CellID', 'Y_centroid', 'X_centroid', 'Area', 'MajorAxisLength',
       'MinorAxisLength', 'Eccentricity', 'Orientation', 'Extent', 'Solidity',
       'mean_CD3', 'mean_CD8'],
      dtype='object')

In [51]:
meta_columns = ['CellID', 'Y_centroid', 'X_centroid', 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Eccentricity', 'Orientation', 'Extent', 'Solidity']

In [52]:
metadata = df[meta_columns].copy()
metadata

Unnamed: 0,CellID,Y_centroid,X_centroid,Area,MajorAxisLength,MinorAxisLength,Eccentricity,Orientation,Extent,Solidity
0,0,10.0,15.0,100,12,8,0.5,0,0.8,0.95
1,1,20.0,25.0,150,14,9,0.6,45,0.85,0.96
2,2,30.0,35.0,120,13,10,0.7,90,0.9,0.97


In [53]:
data = df.drop(columns=meta_columns).copy()
data

Unnamed: 0,mean_CD3,mean_CD8
0,1.0,0.5
1,2.0,0.8
2,3.0,1.1


In [54]:
variables = pd.DataFrame(index=data.columns)
variables

mean_CD3
mean_CD8


In [58]:
adata = quant_to_adata("test_3.csv")

[32m11:59:45.87[0m | [1mINFO[0m |  3 cells and 2 variables


In [59]:
adata.obs

Unnamed: 0,CellID,Y_centroid,X_centroid,Area,MajorAxisLength,MinorAxisLength,Eccentricity,Orientation,Extent,Solidity
0,1,10.0,15.0,100,12,8,0.5,0,0.8,0.95
1,2,20.0,25.0,150,14,9,0.6,45,0.85,0.96
2,3,30.0,35.0,120,13,10,0.7,90,0.9,0.97


In [60]:
adata.var

mean_CD3
mean_CD8


In [37]:
adata.var.values

array([], shape=(3, 0), dtype=float64)

In [38]:
adata.X

array([[0. , 1. , 0.5],
       [1. , 2. , 0.8],
       [2. , 3. , 1.1]])

In [61]:
import anndata as ad
import pandas as pd

from opendvp.utils import logger

#TODO not general enough, exemplar001 fails

def quant_to_adata(
        path: str,
        index_into_1_based : str | None = "CellID",
        meta_columns : list | None = None,
        ) -> ad.AnnData:
    """Convert cell quantification CSV data to an AnnData object for downstream analysis.

    This module provides a function to read a CSV file containing single-cell quantification data, extract metadata and marker intensities, and return an AnnData object suitable for spatial omics workflows. The function expects specific metadata columns and parses marker columns by splitting their names into mathematical operation and marker name.

    Parameters
    ----------
    path : str
        Path to the CSV file containing cell quantification data.
    index_into_1_based : str | None
        Column name to which to check if 0 exists, and if so add 1 to all values
        This is done so that cell index matches segmentation mask values
        If None, no modifications will be performed

    Returns:
    -------
    ad.AnnData
        AnnData object with cell metadata in `.obs` and marker intensities in `.X` and `.var`.

    Examples:
    --------
    >>> from opendvp.io import quant_to_adata
    >>> adata = quant_to_adata('my_quantification.csv')
    >>> print(adata)
    AnnData object with n_obs * n_vars = ...
    >>> adata.obs.head()
    >>> adata.var.head()

    Notes:
    ------
    - The CSV file must contain the following metadata columns: 'CellID', 'Y_centroid', 'X_centroid', 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Eccentricity', 'Orientation', 'Extent', 'Solidity'.
    - All other columns are treated as marker intensities and are split into 'math' and 'marker' components for AnnData.var.
    - Raises ValueError if required metadata columns are missing or if the file is not a CSV.
    - The function logs the number of cells and variables loaded, and the time taken for the operation.
    """
    if not path.endswith('.csv'):
        raise ValueError("The file should be a csv file")
    quant_data = pd.read_csv(path)
    quant_data.index = quant_data.index.astype(str)

    if not meta_columns:
        meta_columns = ['CellID', 'Y_centroid', 'X_centroid', 'Area', 'MajorAxisLength', 
                        'MinorAxisLength', 'Eccentricity', 'Orientation', 'Extent', 'Solidity']
    if not all([column in quant_data.columns for column in meta_columns]):
        raise ValueError("Not all metadata columns are not present in the csv file")

    if index_into_1_based:
        quant_data[index_into_1_based] = quant_data[index_into_1_based].astype(int)
        if (quant_data[index_into_1_based] == 0).any():
            logger.info(f"Detected 0 in '{index_into_1_based}' — shifting all values by +1 for 1-based indexing.")
            quant_data[index_into_1_based] = quant_data[index_into_1_based] + 1

    metadata = quant_data[meta_columns].copy()
    data = quant_data.drop(columns=meta_columns).copy()
    variables = pd.DataFrame(index=data.columns)

    adata = ad.AnnData(X=data.values, obs=metadata, var=variables)
    logger.info(f" {adata.shape[0]} cells and {adata.shape[1]} variables")
    return adata

In [62]:
adata = quant_to_adata("test_data/io/quant.csv")

[32m12:07:43.27[0m | [1mINFO[0m |  9711 cells and 12 variables


In [64]:
adata.shape

(9711, 12)

In [67]:
adata.obs.shape

(9711, 10)

In [4]:
import geopandas as gpd
import pandas as pd

In [5]:

data = {
        'CellID': [0, 1, 2],
        'Y_centroid': [10.0, 20.0, 30.0],
        'X_centroid': [15.0, 25.0, 35.0],
        'Area': [100, 150, 120],
        'MajorAxisLength': [12, 14, 13],
        'MinorAxisLength': [8, 9, 10],
        'Eccentricity': [0.5, 0.6, 0.7],
        'Orientation': [0, 45, 90],
        'Extent': [0.8, 0.85, 0.9],
        'Solidity': [0.95, 0.96, 0.97],
        'mean_CD3': [1.0, 2.0, 3.0],
        'mean_CD8': [0.5, 0.8, 1.1]
    }
df = pd.DataFrame(data)

In [7]:
df.columns

Index(['CellID', 'Y_centroid', 'X_centroid', 'Area', 'MajorAxisLength',
       'MinorAxisLength', 'Eccentricity', 'Orientation', 'Extent', 'Solidity',
       'mean_CD3', 'mean_CD8'],
      dtype='object')