## Basic AnnData Example

Data type used in many python 2d/3d models. Example below only includes obs (1 D observations of lengths) and var (1d annoations in the other direction. 

Obs - Basically column definitions
Var - Basically column definitions
Obsm - Additional Information for each row (cell). This can include things like X,Y coordinates, 

In [1]:
import numpy as np
import pandas as pd
import anndata as ad

# Data matrix
X = np.random.rand(100, 50)  # 100 cells × 50 genes


# Cell annotations
obs = pd.DataFrame(index=[f"cell_{i}" for i in range(100)],
                   data={"condition": ["A"]*50 + ["B"]*50})

# Gene annotations
var = pd.DataFrame(index=[f"gene_{j}" for j in range(50)],
                   data={"gene_name": [f"G{j}" for j in range(50)]})

# Create AnnData
adata = ad.AnnData(X=X, obs=obs, var=var)

adata.X

array([[0.58463654, 0.73223219, 0.4086174 , ..., 0.76058323, 0.82820274,
        0.92518991],
       [0.65696904, 0.86571114, 0.86181572, ..., 0.51373369, 0.40846295,
        0.88271928],
       [0.84587165, 0.89457477, 0.72319442, ..., 0.27548652, 0.51826854,
        0.28405957],
       ...,
       [0.33632241, 0.75754919, 0.63938208, ..., 0.42761196, 0.66514817,
        0.48257019],
       [0.36183078, 0.9115818 , 0.12558265, ..., 0.49224861, 0.33600905,
        0.00677086],
       [0.30767906, 0.74276818, 0.1733098 , ..., 0.06722237, 0.42772522,
        0.02448283]])

In [15]:
import pandas as pd

PandaTable=pd.DataFrame(
    adata.X[:10, :10].toarray() if hasattr(adata.X, "toarray") else adata.X[:10, :10],
    index=adata.obs_names[:10],
    columns=adata.var_names[:10]
)

display(PandaTable.head(15))

adata

Unnamed: 0,4-1BB,B7-H3,BCL2,Beta-catenin,CCR7,CD11b,CD11c,CD123,CD127,CD138
c_1_10_1,4.052731,4.541703,2.784624,3.419147,2.550799,4.550657,4.343398,3.056747,2.151397,2.67453
c_1_10_10,3.969106,4.713497,3.521462,4.386992,3.770126,4.317827,3.946294,3.237743,3.164851,3.653503
c_1_10_100,4.682954,5.489165,3.468132,5.127307,2.546219,3.836335,3.372011,3.199389,3.625306,3.770819
c_1_10_1000,3.996042,4.684326,3.658237,4.163608,3.081272,3.542153,4.030199,3.184208,3.582363,3.717344
c_1_10_1001,4.24582,3.890254,5.117325,5.81483,3.241657,3.665941,2.48983,3.566895,4.030626,4.725803
c_1_10_1002,5.027553,5.351153,4.490508,5.573348,3.965264,4.292326,3.978987,4.522672,4.431604,5.272081
c_1_10_1003,3.725352,3.097807,2.919052,4.932837,2.982254,3.070146,2.851584,2.779232,3.725352,3.225678
c_1_10_1004,3.58086,5.29046,4.60019,6.015118,2.673504,3.467295,2.364261,2.920969,4.690662,5.544488
c_1_10_1005,4.288707,4.633535,4.784353,5.74145,2.95191,3.520344,4.362518,3.434145,3.637132,3.708043
c_1_10_1006,5.113582,4.372048,3.99122,5.390831,3.14129,3.510433,5.505113,4.274272,4.314537,4.018119


AnnData object with n_obs × n_vars = 272605 × 62
    obs: 'cell_ID', 'RNA_pca_cluster_default', 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_negprobes', 'nFeature_negprobes', 'Area', 'AspectRatio', 'Width', 'Height', 'Mean.PanCK', 'Max.PanCK', 'Mean.Yellow', 'Max.Yellow', 'Mean.CD298_B2M', 'Max.CD298_B2M', 'Mean.CD45', 'Max.CD45', 'Mean.DAPI', 'Max.DAPI', 'fov', 'cell_id', 'assay_type', 'dualfiles', 'Run_name', 'Slide_name', 'tissue', 'Alpha', 'slide_ID_numeric', 'Run_Tissue_name', 'Panel', 'Mean.panCK', 'Max.panCK', 'Mean.CD3e', 'Max.CD3e', 'Mean.B2M.ATPase', 'Max.B2M.ATPase', 'Area.um2', 'x_FOV_px', 'y_FOV_px', 'x_slide_mm', 'y_slide_mm', 'area.qc', 'mean.neg', 'negprobe.qc', 'n_high_quant', 'high.express.qc', 'n_low_quant', 'low.express.qc', 'remove_flagged_cells', 'modified.celltyping.round_1', 'modified.celltyping.round_2', 'modified.celltyping.round_3', 'modified.celltyping.cell_type_number', 'modified.celltyping.final_cell_type', 'cell_type', 'RNA_spatialClusteringNeighbo

In [3]:
def get_anndata_value_info(adata, cell_name, gene_name):
    """
    Retrieve the expression value, cell metadata, and gene metadata from an AnnData object.

    Parameters:
    - adata: AnnData object
    - cell_name: str, name of the cell (must be in adata.obs_names)
    - gene_name: str, name of the gene (must be in adata.var_names)

    Returns:
    - dict with expression value, cell metadata (pandas Series), and gene metadata (pandas Series)
    """
    if cell_name not in adata.obs_names:
        raise ValueError(f"Cell '{cell_name}' not found in adata.obs_names")
    if gene_name not in adata.var_names:
        raise ValueError(f"Gene '{gene_name}' not found in adata.var_names")

    # Get expression value
    val = adata[cell_name, gene_name].X
    if hasattr(val, "toarray"):
        val = val.toarray()[0, 0]
    else:
        val = val[0, 0]

    # Get metadata
    cell_meta = adata.obs.loc[cell_name]
    gene_meta = adata.var.loc[gene_name]

    return {
        "expression_value": val,
        "cell_metadata": cell_meta,
        "gene_metadata": gene_meta
    }


result = get_anndata_value_info(adata, "cell_2", "gene_3")

print("Expression value:", result["expression_value"])
print("\nCell metadata:\n", result["cell_metadata"])
print("\nGene metadata:\n", result["gene_metadata"])

Expression value: 0.49687221616408794

Cell metadata:
 condition    A
Name: cell_2, dtype: object

Gene metadata:
 gene_name    G3
Name: gene_3, dtype: object


## Anndata Object Build from Seurat

Attempt to use the AnnData object from Suerat. 

In [22]:
import scanpy as sc
import pandas as pd
import numpy as np

# Load files
expr_orig = pd.read_csv(r"C:\Users\evanj\OneDrive\Documents\expression.csv", index_col=0)
expr=expr_orig.transpose()
metadata = pd.read_csv(r"C:\Users\evanj\OneDrive\Documents\metadata.csv", index_col=0)
umap = pd.read_csv(r"C:\Users\evanj\OneDrive\Documents\umap.csv", index_col=0)

XYmetadata=metadata[['x_FOV_px', 'y_FOV_px']]
display(XYmetadata.tail(15))

# Create AnnData object
adata = sc.AnnData(X=expr.values)

# Assign metadata
adata.obs = metadata
adata.var_names = expr.columns
adata.obs_names = expr.index

# Add spatial coordinates and UMAP to .obsm
adata.obsm["spatial"] = metadata[['x_FOV_px', 'y_FOV_px']].values  # adjust if needed
adata.obsm["X_umap"] = umap.values

# display(expr.head(5))
# display(metadata.head(5))
# display(umap.head(5))

# # Save the AnnData object
# adata.write("spatial_data.h5ad")

Unnamed: 0,x_FOV_px,y_FOV_px
c_2_23_636,3642,1869
c_2_23_658,3392,1898
c_2_23_702,1327,1949
c_2_23_727,3080,1983
c_2_23_756,3880,2002
c_2_23_771,3558,2007
c_2_23_796,3831,2028
c_2_23_811,3556,2037
c_2_23_826,4182,2062
c_2_23_84,3947,560
