In [1]:
# 1. Install Scanpy (run this cell only if Scanpy is not already installed)
# !pip install scanpy

In [2]:
# 2. Import necessary libraries
import scanpy as sc
import numpy as np
import pandas as pd

In [3]:
# 3. Create a synthetic dataset: 100 cells × 500 genes
#    - X: random Poisson counts
#    - obs: cell metadata (e.g., percent_mito)
#    - var: gene metadata (e.g., a "highly_variable" flag)
X = np.random.poisson(lam=1.0, size=(100, 500))
obs = pd.DataFrame({
    'cell_id': [f'cell_{i}' for i in range(100)],
    'percent_mito': np.random.rand(100) * 10
})
var = pd.DataFrame({
    'gene_id': [f'gene_{i}' for i in range(500)],
    'highly_variable': np.random.choice([True, False], size=500)
})

In [4]:
# 4. Set indices for cell and gene metadata
obs.set_index('cell_id', inplace=True)
var.set_index('gene_id', inplace=True)

In [5]:
# 5. Construct the AnnData object
adata = sc.AnnData(X=X, obs=obs, var=var)

In [6]:
adata

AnnData object with n_obs × n_vars = 100 × 500
    obs: 'percent_mito'
    var: 'highly_variable'