## Basic AnnData Example

Data type used in many python 2d/3d models. Example below only includes obs (1 D observations of lengths) and var (1d annoations in the other direction. 

Obs - Basically column definitions
Var - Basically column definitions
Obsm - Additional Information for each row (cell). This can include things like X,Y coordinates, 

In [1]:
import numpy as np
import pandas as pd
import anndata as ad

# Data matrix
X = np.random.rand(100, 50)  # 100 cells × 50 genes


# Cell annotations
obs = pd.DataFrame(index=[f"cell_{i}" for i in range(100)],
                   data={"condition": ["A"]*50 + ["B"]*50})

# Gene annotations
var = pd.DataFrame(index=[f"gene_{j}" for j in range(50)],
                   data={"gene_name": [f"G{j}" for j in range(50)]})

# Create AnnData
adata = ad.AnnData(X=X, obs=obs, var=var)

adata.X

array([[0.04707195, 0.80262355, 0.55309914, ..., 0.67309182, 0.3058146 ,
        0.73755158],
       [0.91335509, 0.04248213, 0.98584067, ..., 0.90296656, 0.19048773,
        0.03456312],
       [0.85150484, 0.67119201, 0.47867869, ..., 0.1525673 , 0.65019522,
        0.84462351],
       ...,
       [0.81275864, 0.59389841, 0.34789703, ..., 0.70254052, 0.99744061,
        0.65193951],
       [0.96476843, 0.82118161, 0.96255264, ..., 0.20785208, 0.82858218,
        0.33560133],
       [0.43437356, 0.77906315, 0.45454722, ..., 0.1538259 , 0.68257826,
        0.68961228]])

In [2]:
import pandas as pd

PandaTable=pd.DataFrame(
    adata.X[:10, :10].toarray() if hasattr(adata.X, "toarray") else adata.X[:10, :10],
    index=adata.obs_names[:10],
    columns=adata.var_names[:10]
)

display(PandaTable.head(15))

adata

Unnamed: 0,gene_0,gene_1,gene_2,gene_3,gene_4,gene_5,gene_6,gene_7,gene_8,gene_9
cell_0,0.047072,0.802624,0.553099,0.737029,0.34831,0.91053,0.665998,0.520565,0.168729,0.944633
cell_1,0.913355,0.042482,0.985841,0.056577,0.539268,0.257945,0.572901,0.577609,0.125839,0.295351
cell_2,0.851505,0.671192,0.478679,0.973596,0.624749,0.102112,0.053866,0.355209,0.933577,0.443353
cell_3,0.823509,0.540136,0.671081,0.231129,0.63334,0.604117,0.694153,0.307645,0.054002,0.992864
cell_4,0.66342,0.503092,0.597177,0.50031,0.606909,0.646588,0.029676,0.038586,0.619704,0.065684
cell_5,0.210518,0.65168,0.57735,0.197925,0.637754,0.719455,0.176968,0.928042,0.769201,0.625183
cell_6,0.218319,0.406706,0.544448,0.256722,0.304608,0.631158,0.315543,0.391949,0.579092,0.586862
cell_7,0.259602,0.512193,0.456245,0.533935,0.622699,0.0843,0.723764,0.272233,0.847849,0.075114
cell_8,0.415282,0.862276,0.283656,0.627566,0.72315,0.302237,0.091406,0.921251,0.280521,0.415031
cell_9,0.579945,0.694061,0.851499,0.691159,0.265203,0.453549,0.850299,0.692048,0.69138,0.9482


AnnData object with n_obs × n_vars = 100 × 50
    obs: 'condition'
    var: 'gene_name'

In [3]:
def get_anndata_value_info(adata, cell_name, gene_name):
    """
    Retrieve the expression value, cell metadata, and gene metadata from an AnnData object.

    Parameters:
    - adata: AnnData object
    - cell_name: str, name of the cell (must be in adata.obs_names)
    - gene_name: str, name of the gene (must be in adata.var_names)

    Returns:
    - dict with expression value, cell metadata (pandas Series), and gene metadata (pandas Series)
    """
    if cell_name not in adata.obs_names:
        raise ValueError(f"Cell '{cell_name}' not found in adata.obs_names")
    if gene_name not in adata.var_names:
        raise ValueError(f"Gene '{gene_name}' not found in adata.var_names")

    # Get expression value
    val = adata[cell_name, gene_name].X
    if hasattr(val, "toarray"):
        val = val.toarray()[0, 0]
    else:
        val = val[0, 0]

    # Get metadata
    cell_meta = adata.obs.loc[cell_name]
    gene_meta = adata.var.loc[gene_name]

    return {
        "expression_value": val,
        "cell_metadata": cell_meta,
        "gene_metadata": gene_meta
    }


result = get_anndata_value_info(adata, "cell_2", "gene_3")

print("Expression value:", result["expression_value"])
print("\nCell metadata:\n", result["cell_metadata"])
print("\nGene metadata:\n", result["gene_metadata"])

Expression value: 0.9735963247244048

Cell metadata:
 condition    A
Name: cell_2, dtype: object

Gene metadata:
 gene_name    G3
Name: gene_3, dtype: object


## Anndata Object Build from Seurat

Attempt to use the AnnData object from Suerat. 

In [22]:
import scanpy as sc
import pandas as pd
import numpy as np

# Load files
expr_orig = pd.read_csv(r"C:\Users\evanj\OneDrive\Documents\expression.csv", index_col=0)
expr=expr_orig.transpose()
metadata = pd.read_csv(r"C:\Users\evanj\OneDrive\Documents\metadata.csv", index_col=0)
umap = pd.read_csv(r"C:\Users\evanj\OneDrive\Documents\umap.csv", index_col=0)

XYmetadata=metadata[['x_FOV_px', 'y_FOV_px']]
display(XYmetadata.tail(15))

# Create AnnData object
adata = sc.AnnData(X=expr.values)

# Assign metadata
adata.obs = metadata
adata.var_names = expr.columns
adata.obs_names = expr.index

# Add spatial coordinates and UMAP to .obsm
adata.obsm["spatial"] = metadata[['x_FOV_px', 'y_FOV_px']].values  # adjust if needed
adata.obsm["X_umap"] = umap.values

# display(expr.head(5))
# display(metadata.head(5))
# display(umap.head(5))

# # Save the AnnData object
# adata.write("spatial_data.h5ad")

Unnamed: 0,x_FOV_px,y_FOV_px
c_2_23_636,3642,1869
c_2_23_658,3392,1898
c_2_23_702,1327,1949
c_2_23_727,3080,1983
c_2_23_756,3880,2002
c_2_23_771,3558,2007
c_2_23_796,3831,2028
c_2_23_811,3556,2037
c_2_23_826,4182,2062
c_2_23_84,3947,560
