### Example for creating a down sampled leiden clustering plot
Matched the number of cells in the control by randomly selecting the same number of points from the treatment.

In [27]:
import scanpy
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [22]:
path = "/my/path"
adata = scanpy.read_h5ad(path+"adata_w_leiden_groups.h5ad")
print(adata)

AnnData object with n_obs × n_vars = 81568 × 8945
    obs: 'sample_id', 'condition', 'batch', 'doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'leiden', 'merged leiden'
    var: 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'batch_colors', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'sample_id_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'


  utils.warn_names_duplicates("obs")


In [23]:
control_adata = adata[adata.obs.condition == "Control"]
print(control_adata)

View of AnnData object with n_obs × n_vars = 9454 × 8945
    obs: 'sample_id', 'condition', 'batch', 'doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'leiden', 'merged leiden'
    var: 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'batch_colors', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'sample_id_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'


In [24]:
treatment_adata = adata[adata.obs.condition != "Control"]
print(treatment_adata)

View of AnnData object with n_obs × n_vars = 72114 × 8945
    obs: 'sample_id', 'condition', 'batch', 'doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'leiden', 'merged leiden'
    var: 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'batch_colors', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'sample_id_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'


In [25]:
num_samples = len(control_adata.obs)
print(num_samples)

9454


In [26]:
def random_sample_adata(adata, n_samples, sample_size, seed=None):

    if seed is not None:
        np.random.seed(seed)

    sampled_indices = []

    # Perform random sampling
    for _ in range(n_samples):
        sample = np.random.choice(adata.n_obs, size=sample_size, replace=False)
        sampled_indices.extend(sample)

    # Create a new AnnData object with the sampled cells
    adata_sampled = adata[sampled_indices].copy()

    return adata_sampled
\
treatment_adata_sampled = random_sample_adata(treatment_adata, n_samples=num_samples, sample_size=1, seed=42)
print(treatment_adata_sampled)




AnnData object with n_obs × n_vars = 9454 × 8945
    obs: 'sample_id', 'condition', 'batch', 'doublet', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'leiden', 'merged leiden'
    var: 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'batch_colors', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'sample_id_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'


In [30]:
plt.switch_backend('agg')

with plt.rc_context():
    fig, ax = plt.subplots(figsize=(10, 7))
    scanpy.pl.umap(treatment_adata_sampled, color='merged leiden', size=3, legend_loc='on data')
    plt.title("Treatment")
    plt.savefig(path+"UMAP_leiden_Treatment_automated_names_labeled_on_plot_downsampled.png", bbox_inches='tight')