In [1]:
%load_ext autoreload
%autoreload 2

import logging

import anndata as ad
import numpy as np
import pandas as pd

from alphatools import pp

logging.basicConfig(level=logging.INFO)

### Test data module:

Evaluate adding and modifying data and metadata

In [2]:
df = pd.DataFrame(
    np.array([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]])
)
df.index = ["cell1", "cell2", "cell3", "cell4", "cell5", "cell6"]
df.columns = ["gene1", "gene2", "gene3", "gene4", "gene5"]

smd = pd.DataFrame(np.random.rand(6, 2))
smd.index = ["cell1", "cell2", "cell3", "cell4", "cell5", "cell6"]
smd.columns = ["par1", "par2"]

smd2 = pd.DataFrame(np.random.rand(5, 2))
smd2.index = ["cell1", "cell2", "cell3", "cell5", "cell6"]
smd2.columns = ["par1", "par3"]

fmd = pd.DataFrame(np.random.rand(5, 2))
fmd.index = ["gene1", "gene2", "gene3", "gene4", "gene5"]
fmd.columns = ["cat1", "cat2"]

fmd2 = pd.DataFrame(np.random.rand(4, 2))
fmd2.index = ["gene1", "gene2", "gene3", "gene5"]
fmd2.columns = ["cat1", "cat3"]

In [3]:
# standard anndata
adata_standard = ad.AnnData(df)
adata_standard.obs = smd
adata_standard.var = fmd
print(adata_standard)

# alphatools anndata
adata_alphatools = pp.to_anndata(df, smd, fmd)
print(adata_alphatools)

# assert equality of the two anndata objects
if not adata_standard.obs.equals(adata_alphatools.obs):
    print(adata_standard.obs, adata_alphatools.obs)
if not adata_standard.var.equals(adata_alphatools.var):
    print(adata_standard.var, adata_alphatools.var)
if not np.equal(adata_standard.X, adata_alphatools.X).all():
    print(adata_standard.X, adata_alphatools.X)

AnnData object with n_obs × n_vars = 6 × 5
    obs: 'par1', 'par2'
    var: 'cat1', 'cat2'
AnnData object with n_obs × n_vars = 6 × 5
    obs: 'par1', 'par2'
    var: 'cat1', 'cat2'


In [4]:
# add sample metadata to anndata via alphatools by replacing metadata
adata_alphatools = pp.add_metadata(adata_alphatools, smd, axis=0, keep_data_shape=False, keep_existing_metadata=False)

# add sample metadata to anndata via alphatools by appending metadata
adata_alphatools = pp.add_metadata(adata_alphatools, smd2, axis=0, keep_data_shape=True, keep_existing_metadata=True)

# add feature metadata to anndata via alphatools by replacing metadata
adata_alphatools = pp.add_metadata(adata_alphatools, fmd, axis=1, keep_data_shape=False, keep_existing_metadata=False)

# add feature metadata to anndata via alphatools by appending metadata
adata_alphatools = pp.add_metadata(adata_alphatools, fmd2, axis=1, keep_data_shape=False, keep_existing_metadata=True)

INFO:root:pp.add_metadata(): Data (6, 5) to (6, 5); obs (6, 2) to (6, 2); var (5, 2) to (5, 2) 

INFO:root:pp.add_metadata(): Synonymous fields, dropping ['par1'] from incoming metadata.
INFO:root:pp.add_metadata(): Join incoming to existing metadata via left join on axis  0.
INFO:root:pp.add_metadata(): Data (6, 5) to (6, 5); obs (6, 2) to (6, 3); var (5, 2) to (5, 2) 



INFO:root:pp.add_metadata(): Data (6, 5) to (6, 5); obs (6, 3) to (6, 3); var (5, 2) to (5, 2) 

INFO:root:pp.add_metadata(): Synonymous fields, dropping ['cat1'] from incoming metadata.
INFO:root:pp.add_metadata(): Join incoming to existing metadata via inner join on axis  1.
INFO:root:pp.add_metadata(): Data (6, 5) to (6, 4); obs (6, 3) to (6, 3); var (5, 2) to (4, 3) 



In [5]:
adata_alphatools.obs

Unnamed: 0,par1,par2,par3
cell1,0.090026,0.999942,0.711686
cell2,0.785069,0.452904,0.129063
cell3,0.258271,0.135915,0.673408
cell4,0.392679,0.363576,
cell5,0.585397,0.928658,0.50686
cell6,0.486908,0.652778,0.157426


### Evaluate standard AnnData view/copy behavior

In [28]:
adata = ad.AnnData(df)
adata.obs = smd
adata.var = fmd

In [29]:
adata[:5, ["gene1", "gene2"]]  # view
adata_subset = adata[:5, ["gene1", "gene2"]]  # view
adata_subset_copy = adata[:5, ["gene1", "gene2"]].copy()  # copy

In [30]:
adata_subset_copy[:2, "gene1"] = 0  # change inplace
adata_subset_copy.X

array([[0, 1],
       [0, 2],
       [3, 3],
       [4, 4],
       [5, 5]])

In [31]:
adata_subset[:2, "gene1"] = 0  # change inplace, fails because [] assignment does not work on view

ValueError: Object is view and cannot be accessed with `[]`.

In [38]:
adata_subset.obs["status"] = "test"  # works, changes adata_subset to its own object
# same for any change of obs/var

# now,
adata_subset[:2, "gene1"] = 0  # works, changes adata_subset to its own object

In [43]:
adata.layers["test"] = adata.X**2  # works

In [49]:
display(adata.layers["test"])  # layers can be accessed individually
display(adata.X)

array([[ 1,  1,  1,  1,  1],
       [ 4,  4,  4,  4,  4],
       [ 9,  9,  9,  9,  9],
       [16, 16, 16, 16, 16],
       [25, 25, 25, 25, 25],
       [36, 36, 36, 36, 36]])

array([[1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4],
       [5, 5, 5, 5, 5],
       [6, 6, 6, 6, 6]])