In [50]:
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
import scipy
from scipy.sparse import csr_matrix
print(ad.__version__)

0.10.3


## basic

In [11]:
# create anndata
n_obs, n_var = 10, 20
m = csr_matrix(np.random.poisson(1, size=(n_obs, n_var)), dtype=np.float32)
a = ad.AnnData(m)
a.obs_names = [f'cell_{i}' for i in range(n_obs)]
a.var_names = [f'gene_{i}' for i in range(n_var)]
a

AnnData object with n_obs × n_vars = 10 × 20

In [18]:
# add metadata
a.obs['cell_type'] = np.random.choice(['B', 'T', 'Monocyte'], size=(n_obs,))
a.obsm['umap'] = np.random.normal(0,1, size=(n_obs, 2))
a.varm['gene_stuff'] = np.random.normal(0,1, size=(n_var, 2))
a.un= {'fa_file':'/mnt/raw_data/a.fa'}
a

AnnData object with n_obs × n_vars = 10 × 20
    obs: 'cell_type'
    obsm: 'umap'
    varm: 'gene_stuff'

In [22]:
# natural log of (1+X)
a.layers['log10'] = np.log1p(a.X)

In [25]:
# to data frame
a.to_df(layer='log10')
a.to_df()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.0,1.0,0.0,0.0,1.0,1.0,2.0,1.0,2.0,0.0,0.0,1.0,2.0,2.0,1.0,0.0,2.0,1.0,0.0,1.0
1,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,4.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0
2,1.0,2.0,1.0,0.0,2.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0
3,0.0,2.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,2.0,2.0,0.0,1.0,2.0,1.0,0.0,1.0,1.0,0.0,2.0
4,0.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0
5,1.0,1.0,1.0,0.0,0.0,2.0,1.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
6,2.0,1.0,2.0,0.0,2.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
7,2.0,1.0,1.0,0.0,1.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0
8,0.0,1.0,4.0,0.0,2.0,2.0,1.0,0.0,2.0,0.0,1.0,3.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,1.0
9,1.0,2.0,1.0,2.0,5.0,0.0,1.0,2.0,0.0,1.0,0.0,2.0,1.0,3.0,1.0,1.0,1.0,0.0,1.0,2.0


In [26]:
# write
a.write('anndata.h5ad')

## merge/concatenate

In [32]:
pbmc = sc.datasets.pbmc68k_reduced()
pbmc

AnnData object with n_obs × n_vars = 700 × 765
    obs: 'bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score', 'G2M_score', 'phase', 'louvain'
    var: 'n_counts', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
    uns: 'bulk_labels_colors', 'louvain', 'louvain_colors', 'neighbors', 'pca', 'rank_genes_groups'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'

In [40]:
groups = pbmc.obs.groupby("louvain", observed=True).indices
pbmc_concat = ad.concat([pbmc[inds] for inds in groups.values()], merge="same")
assert np.array_equal(pbmc.X, pbmc_concat[pbmc.obs_names].X)
pbmc_concat

AnnData object with n_obs × n_vars = 700 × 765
    obs: 'bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score', 'G2M_score', 'phase', 'louvain'
    var: 'n_counts', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'

In [172]:
x = ad.AnnData(pd.DataFrame(None, dtype='float', columns=list('ABC')))
print(x)
vars = {'gene': ['g1','g2',' g3'], 'start':[2,45,6]}
a = ad.AnnData(np.eye(3), var=vars, obs=pd.DataFrame(index=list('ABC')))
b = ad.AnnData(pd.DataFrame([1,2,3]).transpose(), var=pd.DataFrame(index=list("bcd")), obs=pd.DataFrame(index=list('D')))
print('##A\n', a.X)
print('##B\n', b.X)
r = ad.concat([x,a, b], axis=0, join='outer', fill_value=0).X
print('##\n', r)


AnnData object with n_obs × n_vars = 0 × 3
##A
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
##B
 [[1 2 3]]
##
 [[1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 2. 3.]]




In [196]:
x = ad.AnnData(pd.DataFrame(None, dtype='float', columns=list('bcd')))
print(x)
b = ad.AnnData(pd.DataFrame([1,2,3]).transpose(), var=pd.DataFrame(index=list("bcd")), obs=pd.DataFrame(index=list('D')))
c = ad.AnnData(pd.DataFrame([1,2]).transpose(), var=pd.DataFrame(index=list("cd")), obs=pd.DataFrame(index=list('A')))
d = ad.AnnData(pd.DataFrame([1,2,3,4]).transpose(), var=pd.DataFrame(index=list("abcd")), obs=pd.DataFrame(index=list('B')))
print('##B\n', b.X)
r = ad.concat([x, b, c,d], join='outer', fill_value=0)
print('##\n', r.var, r.obs)
print(r.X)

ValueError: no types given