In [8]:
import numpy as np
import pandas as pd
import re
import pysam
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord


In [6]:
import anndata as ad
from scipy.sparse import csr_matrix

In [14]:
m = np.random.poisson(1, size=(100, 2000))
counts = csr_matrix(m, dtype=np.float32)
adata = ad.AnnData(counts)
print(adata)

AnnData object with n_obs × n_vars = 100 × 2000


In [16]:
adata.X
adata.obs_names = [f"Cell_{i:d}" for i in range(adata.n_obs)]
adata.var_names = [f"Gene_{i:d}" for i in range(adata.n_vars)]
print(adata.obs_names[:10])

Index(['Cell_0', 'Cell_1', 'Cell_2', 'Cell_3', 'Cell_4', 'Cell_5', 'Cell_6',
       'Cell_7', 'Cell_8', 'Cell_9'],
      dtype='object')


In [17]:
adata[["Cell_1", "Cell_10"], ["Gene_5", "Gene_1900"]]

View of AnnData object with n_obs × n_vars = 2 × 2

In [18]:
ct = np.random.choice(["B", "T", "Monocyte"], size=(adata.n_obs,))
adata.obs["cell_type"] = pd.Categorical(ct)  # Categoricals are preferred for efficiency
adata.obs

Unnamed: 0,cell_type
Cell_0,Monocyte
Cell_1,T
Cell_2,Monocyte
Cell_3,T
Cell_4,T
...,...
Cell_95,T
Cell_96,B
Cell_97,T
Cell_98,T


In [23]:
adata.obsm["X2_umap"] = np.random.normal(0, 1, size=(adata.n_obs, 2))
adata.varm["gene_stuff"] = np.random.normal(0, 1, size=(adata.n_vars, 5))
adata.obsm

AxisArrays with keys: X_umap, X2_umap

In [24]:
adata

AnnData object with n_obs × n_vars = 100 × 2000
    obs: 'cell_type'
    obsm: 'X_umap', 'X2_umap'
    varm: 'gene_stuff'

In [26]:
adata.layers["log_transformed"] = np.log1p(adata.X)
adata
adata.to_df(layer="log_transformed")

Unnamed: 0,Gene_0,Gene_1,Gene_2,Gene_3,Gene_4,Gene_5,Gene_6,Gene_7,Gene_8,Gene_9,...,Gene_1990,Gene_1991,Gene_1992,Gene_1993,Gene_1994,Gene_1995,Gene_1996,Gene_1997,Gene_1998,Gene_1999
Cell_0,0.693147,0.693147,1.098612,0.000000,0.000000,1.098612,1.098612,0.693147,0.693147,0.000000,...,0.693147,0.000000,0.000000,0.693147,0.693147,1.098612,0.693147,0.693147,0.693147,0.000000
Cell_1,0.693147,0.693147,1.098612,0.693147,1.098612,0.693147,1.386294,1.098612,1.098612,1.098612,...,0.693147,1.098612,0.693147,0.693147,0.000000,0.693147,1.098612,1.386294,0.693147,0.693147
Cell_2,1.098612,0.000000,1.098612,0.693147,0.000000,0.693147,1.098612,0.000000,0.000000,0.693147,...,0.000000,0.693147,1.098612,1.098612,0.000000,0.693147,0.693147,0.000000,0.000000,0.693147
Cell_3,1.098612,0.693147,0.000000,1.098612,0.693147,1.098612,1.098612,0.693147,0.693147,1.098612,...,0.693147,1.609438,0.000000,0.693147,0.000000,0.000000,0.693147,0.000000,0.000000,0.000000
Cell_4,0.000000,0.693147,0.693147,1.386294,0.693147,1.098612,0.693147,0.000000,0.000000,0.000000,...,0.693147,1.098612,0.000000,0.000000,0.000000,0.000000,0.693147,0.000000,1.098612,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cell_95,0.000000,0.693147,0.000000,0.693147,1.098612,1.386294,0.693147,0.693147,0.693147,1.098612,...,0.000000,0.693147,0.000000,0.000000,0.000000,0.693147,0.693147,0.693147,0.693147,0.000000
Cell_96,0.000000,1.386294,1.098612,0.000000,0.000000,1.098612,1.098612,1.098612,0.000000,1.609438,...,0.693147,1.386294,1.098612,1.098612,0.693147,0.693147,0.693147,0.000000,0.000000,1.098612
Cell_97,1.098612,0.693147,0.693147,0.000000,1.098612,1.098612,0.000000,1.386294,0.000000,0.000000,...,1.098612,0.000000,0.693147,1.386294,0.693147,1.098612,1.098612,0.000000,0.000000,0.000000
Cell_98,0.000000,1.098612,0.000000,0.693147,0.000000,0.000000,0.693147,1.098612,0.693147,0.000000,...,1.098612,0.693147,1.098612,0.693147,0.693147,1.386294,0.693147,1.098612,1.098612,0.693147
