# Notebook for Annotation of clusters after `HTODemux` - from markers to clusters after removal batch effects using `BBKNNN`

**Created by :** Srivalli Kolla

**Created on :** 22 April, 2025

**Modified on :** 10 May, 2025

**University of Würzburg**

Env : scanpy (Python 3.12.2)

# Importing Packages

In [93]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sb
import bbknn
import datetime
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib import rcParams

In [94]:
sc.settings.verbosity = 3
sc.logging.print_versions()

plt.rcParams['figure.dpi'] = 300  
plt.rcParams['savefig.dpi'] = 300

timestamp = datetime.datetime.now().strftime("%d_%m_%y")

-----
anndata     0.11.3
scanpy      1.10.4
-----
Cython                      3.0.12
PIL                         11.1.0
annoy                       NA
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        25.1.0
attrs                       25.1.0
babel                       2.17.0
bbknn                       1.6.0
certifi                     2025.01.31
cffi                        1.17.1
charset_normalizer          3.4.1
colorama                    0.4.6
comm                        0.2.2
cycler                      0.12.1
cython                      3.0.12
cython_runtime              NA
dateutil                    2.9.0.post0
debugpy                     1.8.12
decorator                   5.2.1
defusedxml                  0.7.1
executing                   2.1.0
fastjsonschema              NA
fqdn                        NA
h5py                        3.13.0
idna                        3.10
igraph                      0.

  mod_version = _find_version(mod.__version__)


# Data import

In [95]:
adata = sc.read_h5ad('./Github/Nuclear_hashing_2025/data/filtered_scrublet_doublets_after_annotation_HTODemux_11_04_25.h5ad')
adata

AnnData object with n_obs × n_vars = 11900 × 32285
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_HTO', 'nFeature_HTO', 'HTO_maxID', 'HTO_secondID', 'HTO_margin', 'HTO_classification', 'HTO_classification.global', 'hash.ID', 'ident', 'Sample_given', 'Sample-ID', 'Mouse-ID', 'Sex', 'Group', 'Nuclei Purification Method after Hashing', 'assigned_hashtag', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden_0.2', 'leiden_0.5', 'cell_type', 'doublet_scores', 'predicted_doublets'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    uns: 'Group_colors', 'HTO_classification_colors', 'Nuclei Purification Method after Hashing_colors', 'Sex_colors', 'X_name', 'assigned_hashtag_colors', 'cell_type_colors', 'dea_ranking', 'dendrogram_leiden_0.2', 'leiden_0.2', 'leiden_0.2_colors', 'leiden_0.5', 'leiden_0.5_colors', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca'

In [96]:
adata.obs

Unnamed: 0,orig.ident,nCount_RNA,nFeature_RNA,nCount_HTO,nFeature_HTO,HTO_maxID,HTO_secondID,HTO_margin,HTO_classification,HTO_classification.global,...,total_counts,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,leiden_0.2,leiden_0.5,cell_type,doublet_scores,predicted_doublets
AAACCAAAGGCGTCCA-1,SeuratProject,7966.0,2771,1928.0,8,TotalSeqB4,TotalSeqB9,0.377318,TotalSeqB4,Singlet,...,7966.0,418.0,5.247301,77.0,0.966608,0,5,Cardiomyocytes,0.098497,0
AAACCAAAGTTAGGCC-1,SeuratProject,16912.0,4274,2176.0,8,TotalSeqB4,TotalSeqB9,0.664448,TotalSeqB4,Singlet,...,16912.0,380.0,2.246925,86.0,0.508515,0,0,Cardiomyocytes,0.169903,0
AAACCATTCCATCGAA-1,SeuratProject,5731.0,1707,2045.0,8,TotalSeqB8,TotalSeqB6,0.597086,TotalSeqB8,Singlet,...,5731.0,608.0,10.608969,51.0,0.889897,3,5,Ventricular Cardiomyocytes,0.131833,0
AAACCATTCCCATGAA-1,SeuratProject,12396.0,3906,1374.0,8,TotalSeqB4,TotalSeqB6,0.658417,TotalSeqB4,Singlet,...,12396.0,315.0,2.541142,81.0,0.653437,2,2,Pacemaker Cells,0.061069,0
AAACCATTCGAATTAC-1,SeuratProject,19392.0,4854,1785.0,8,TotalSeqB4,TotalSeqB5,0.605797,TotalSeqB4,Singlet,...,19392.0,357.0,1.840965,81.0,0.417698,2,2,Pacemaker Cells,0.061069,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TGTGTTAGTCATCCTC-1,SeuratProject,5462.0,2470,2059.0,8,TotalSeqB5,TotalSeqB8,0.069448,TotalSeqB5,Singlet,...,5462.0,300.0,5.492494,59.0,1.080190,6,8,Immune Cells,0.029412,0
TGTGTTAGTTACACCG-1,SeuratProject,11820.0,3483,2200.0,8,TotalSeqB3,TotalSeqB8,0.147143,TotalSeqB3,Singlet,...,11820.0,181.0,1.531303,63.0,0.532995,0,0,Cardiomyocytes,0.159533,0
TGTGTTGAGCAAGGCA-1,SeuratProject,15725.0,4708,1760.0,8,TotalSeqB7,TotalSeqB6,0.533907,TotalSeqB7,Singlet,...,15725.0,366.0,2.327504,174.0,1.106518,2,2,Pacemaker Cells,0.164286,0
TGTGTTGAGTACGCAC-1,SeuratProject,5921.0,2736,2001.0,8,TotalSeqB4,TotalSeqB8,0.096781,TotalSeqB4,Singlet,...,5921.0,213.0,3.597365,58.0,0.979564,1,1,Fibroblasts,0.030246,0


#### Check if data is raw or Normalized

In [97]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

In [98]:
print(X_is_raw(adata))

False


In [99]:
adata.X= adata.layers['raw_counts']
print(X_is_raw(adata))

True


In [100]:
sc.pp.highly_variable_genes(
    adata,
    flavor = "seurat_v3",
    n_top_genes = 8000,
    layer = "raw_counts",
    batch_key = 'assigned_hashtag',
    subset = False
)
adata

extracting highly variable genes
--> added
    'highly_variable', boolean vector (adata.var)
    'highly_variable_rank', float vector (adata.var)
    'means', float vector (adata.var)
    'variances', float vector (adata.var)
    'variances_norm', float vector (adata.var)


AnnData object with n_obs × n_vars = 11900 × 32285
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_HTO', 'nFeature_HTO', 'HTO_maxID', 'HTO_secondID', 'HTO_margin', 'HTO_classification', 'HTO_classification.global', 'hash.ID', 'ident', 'Sample_given', 'Sample-ID', 'Mouse-ID', 'Sex', 'Group', 'Nuclei Purification Method after Hashing', 'assigned_hashtag', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden_0.2', 'leiden_0.5', 'cell_type', 'doublet_scores', 'predicted_doublets'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'Group_colors', 'HTO_classification_colors', 'Nuclei Purification Method after Hashing_colors', 'Sex_colors', 'X_name', 'assigned_hashtag_colors', 'cell_type_colors', 'dea_ranking', 'dendrogram_leiden_0.2', 'leiden_0.2'

In [101]:
adata.X= adata.layers['cpm_normalization']
print(X_is_raw(adata))

False


# Data Overview 

## Batch effects removal

In [102]:
bbknn = bbknn.bbknn(adata, batch_key='assigned_hashtag', neighbors_within_batch = 4, approx = True, copy = True)
bbknn

computing batch balanced neighbors


	finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)


AnnData object with n_obs × n_vars = 11900 × 32285
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_HTO', 'nFeature_HTO', 'HTO_maxID', 'HTO_secondID', 'HTO_margin', 'HTO_classification', 'HTO_classification.global', 'hash.ID', 'ident', 'Sample_given', 'Sample-ID', 'Mouse-ID', 'Sex', 'Group', 'Nuclei Purification Method after Hashing', 'assigned_hashtag', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden_0.2', 'leiden_0.5', 'cell_type', 'doublet_scores', 'predicted_doublets'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'Group_colors', 'HTO_classification_colors', 'Nuclei Purification Method after Hashing_colors', 'Sex_colors', 'X_name', 'assigned_hashtag_colors', 'cell_type_colors', 'dea_ranking', 'dendrogram_leiden_0.2', 'leiden_0.2'

## Clustering

In [103]:
sc.tl.pca(bbknn, n_comps=50, use_highly_variable=True)
sc.pp.neighbors(bbknn)

computing PCA
    with n_comps=50




    finished (0:00:06)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:01)


In [104]:
sc.tl.umap(bbknn)

computing UMAP


    finished: added
    'X_umap', UMAP coordinates (adata.obsm)
    'umap', UMAP parameters (adata.uns) (0:00:04)


## Data Visualization

In [16]:
features = [
    'Sex', 'Group', 'HTO_classification', 'assigned_hashtag',
    'Nuclei Purification Method after Hashing', 'total_counts',
    'n_genes_by_counts', 'pct_counts_mt', 'pct_counts_ribo', 'doublet_scores'
]

for feature in features:
    ax = sc.pl.umap(
        bbknn,
        color=feature,
        frameon=False,
        layer='cpm_normalization',
        cmap='RdYlBu_r',
        show=False, legend_loc='right margin' 
    )
    
    fig = plt.gcf()

    plt.tight_layout(rect=[0, 0, 0.85, 1])  
    
    safe_name = feature.replace(" ", "_").replace("/", "_")
    plt.savefig(
        f"./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_{safe_name}.png",
        dpi=300,
        transparent=True,
        facecolor='none',
        edgecolor='none'
    )
    plt.close()

# Annotation - Overall

## Marker Genes

In [106]:
marker_genes = {'Ventricular Cardiomyocytes' : ['Myh7',' Myl2',' Fhl2'],
 'Atrial Cardiomyocytes' :['Nppa',' Myl7',' Myl4'],
 'Fibroblasts': ['Dcn',' Gsn',' Pdgfra'],
 'Endothelial Cells' :['Vwf',' Pecam1',' Cdh5'],
 'Pericytes' :['Rgs5',' Abcc9',' Kcnj8'],
 'Smooth Muscle Cells' :['Myh11',' Tagln',' Acta2'],
 'Myeloid Immune Cells' : ['Cd14',' C1qa',' Cd68'],
 'Lymphoid Immune Cells' :['Cd8a',' Il7r',' Cd40lg'],
 'Adipocytes' : ['Gpam',' Fasn',' Lep'],
 'Neuronal Cells' :['Plp1',' Nrxn1',' Nrxn3'],
 'Mesothelial Cells' :['Msln',' Wt1',' Bnc1']
}

In [107]:
marker_genes_in_data = {}
for ct, markers in marker_genes.items():
    markers_found = []
    for marker in markers:
        if marker in bbknn.var.index:
            markers_found.append(marker)
    marker_genes_in_data[ct] = markers_found

## Plotting

In [108]:
for cell_type, genes in marker_genes.items():
    
    cleaned_genes = [g.strip() for g in genes if g.strip() in bbknn.var_names]

    if cleaned_genes:
        print(f"{cell_type.upper()}:\n  → Plotting: {', '.join(cleaned_genes)}\n")

        for gene in cleaned_genes:
            sc.pl.umap(
                bbknn,
                color=gene,
                vmin=0,
                vmax="p99",
                sort_order=False,
                frameon=False,
                cmap="RdYlBu_r",
                layer='cpm_normalization',
                show=False
            )
            safe_gene = gene.replace(" ", "_").replace("/", "_")
            plt.savefig(
                f"./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/{cell_type}_{safe_gene}.png",
                dpi=300,
                transparent=True,
                facecolor='none',
                edgecolor='none'
            )
            plt.close()

    else:
        print(f"{cell_type.upper()}:\n  ✗ No valid marker genes found in bbknn.var_names.\n")

    print("\n" + "-"*60 + "\n")

VENTRICULAR CARDIOMYOCYTES:
  → Plotting: Myh7, Myl2, Fhl2




------------------------------------------------------------

ATRIAL CARDIOMYOCYTES:
  → Plotting: Nppa, Myl7, Myl4


------------------------------------------------------------

FIBROBLASTS:
  → Plotting: Dcn, Gsn, Pdgfra


------------------------------------------------------------

ENDOTHELIAL CELLS:
  → Plotting: Vwf, Pecam1, Cdh5


------------------------------------------------------------

PERICYTES:
  → Plotting: Rgs5, Abcc9, Kcnj8


------------------------------------------------------------

SMOOTH MUSCLE CELLS:
  → Plotting: Myh11, Tagln, Acta2


------------------------------------------------------------

MYELOID IMMUNE CELLS:
  → Plotting: Cd14, C1qa, Cd68


------------------------------------------------------------

LYMPHOID IMMUNE CELLS:
  → Plotting: Cd8a, Il7r, Cd40lg


------------------------------------------------------------

ADIPOCYTES:
  → Plotting: Gpam, Fasn, Lep


------------------------------------------------------------

NEURONAL CELLS:
  → Plotti

## Leiden Clustering

In [109]:
sc.tl.leiden(bbknn, resolution=1, key_added="leiden_1")

running Leiden clustering


    finished: found 26 clusters and added
    'leiden_1', the cluster labels (adata.obs, categorical) (0:00:03)


In [110]:
sc.tl.leiden(bbknn, resolution=0.1, key_added="leiden_0.1")

running Leiden clustering
    finished: found 7 clusters and added
    'leiden_0.1', the cluster labels (adata.obs, categorical) (0:00:01)


In [111]:
sc.tl.leiden(bbknn, resolution=0.2, key_added="leiden_0.2")

running Leiden clustering


    finished: found 13 clusters and added
    'leiden_0.2', the cluster labels (adata.obs, categorical) (0:00:01)


In [112]:
sc.tl.leiden(bbknn, resolution=0.3, key_added="leiden_0.3")

running Leiden clustering


    finished: found 15 clusters and added
    'leiden_0.3', the cluster labels (adata.obs, categorical) (0:00:02)


In [113]:
sc.tl.leiden(bbknn, resolution=0.5, key_added="leiden_0.5")

running Leiden clustering


    finished: found 17 clusters and added
    'leiden_0.5', the cluster labels (adata.obs, categorical) (0:00:02)


In [114]:
for leiden in ["leiden_0.1", "leiden_0.2", "leiden_0.3", "leiden_0.5", "leiden_1"]:
    sc.pl.umap(
        bbknn,
        color=leiden,
        frameon=False,
        legend_loc="on data",
        show=False
    )
    plt.savefig(
        f"./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_{leiden}.png",
        dpi=300,
        transparent=True,
        facecolor='none',
        edgecolor='none'
    )
    plt.close()

# Cluster Annotation

In [115]:
sc.pl.umap(
    bbknn,
    color='leiden_0.3',
    frameon=False,
    legend_loc='on data',
    show=False  
)
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_leiden_0.3.png",
    dpi=300,
    transparent=True,
    facecolor='none',
    edgecolor='none'
)
plt.close()

In [116]:
cl_annotation = {
"0" : "Venticular Cardiomyocytes",
"1" : "Venticular Cardiomyocytes",
"3" : "Atrial Cardiomyocytes",
"2" : "Fibroblasts",
"8" : "Fibroblasts + Adipocytes",
"12" : "Endothelial Cells",
"5" : "Endothelial Cells",
"4" : "Endothelial Cells + Pericytes + Neuronal Cells",
"10" : "Endothelial Cells + Lymphoid Immune Cells",
"6" : "Endothelial Cells + Pericytes + Adipocytes + Neuronal Cells",
"9" : "Endothelial Cells + Pericytes + Adipocytes",
"11" : "Pericytes + Neuronal Cells",
"13" : "Pericytes + Adipocytes",
"14" : "Smooth Muscle Cells",
"7" : "Atrial Cardiomyocytes + Myeloid Immune Cells + Fibroblasts"
}

In [117]:
bbknn.obs["manual_celltype_annotation_specific"] = bbknn.obs['leiden_0.3'].map(cl_annotation)

In [118]:
sc.pl.umap(
    bbknn,
    color=["manual_celltype_annotation_specific"],
    frameon=False,
    show=False, legend_loc='on data'
)
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_manual_specific_anno.png",
    dpi=300,
    transparent=True,
    facecolor='none',
    edgecolor='none'
)
plt.close()

In [119]:
cl_annotation2 = {
"0" : "Venticular Cardiomyocytes",
"1" : "Venticular Cardiomyocytes",
"3" : "Atrial Cardiomyocytes",
"2" : "Fibroblasts",
"8" : "Mixed Cell Types",
"12" : "Endothelial Cells",
"5" : "Endothelial Cells",
"4" : "Mixed Cell Types",
"10" : "Mixed Cell Types",
"6" : "Mixed Cell Types",
"9" : "Mixed Cell Types",
"11" : "Mixed Cell Types",
"13" : "Mixed Cell Types",
"14" : "Smooth Muscle Cells",
"7" : "Mixed Cell Types"
}

In [120]:
bbknn.obs["manual_celltype_annotation_broad"] = bbknn.obs['leiden_0.3'].map(cl_annotation2)

In [121]:
sc.pl.umap(
    bbknn,
    color=["manual_celltype_annotation_broad"],
    frameon=False,
    show=False, legend_loc = 'on data'
)
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_manual_broad_anno.png",
    dpi=300,
    transparent=True,
    facecolor='none',
    edgecolor='none'
)
plt.close()

In [122]:
ax = sc.pl.umap(
    bbknn,
    color=["manual_celltype_annotation_broad"],
    frameon=False,
    show=False,
    legend_loc='right margin' 
)

fig = plt.gcf()

plt.tight_layout(rect=[0, 0, 0.85, 1])  
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_manual_broad_anno_legend_outside.png",
    dpi=300,
    bbox_inches='tight',
    transparent=True,
    facecolor='none'
)
plt.close()

# Differentially Expressed Genes (DEGs)

In [123]:
sc.tl.rank_genes_groups(bbknn, groupby="leiden_0.3", method="wilcoxon",use_raw= False, key_added = 'dea_leiden')

ranking genes


    finished: added to `.uns['dea_leiden']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:19)


In [124]:
sc.pl.rank_genes_groups_dotplot(
    bbknn,
    groupby='leiden_0.3',
    standard_scale='var',
    n_genes=5,
    key='dea_leiden',
    show=False  
)
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/DEG.png",
    dpi=300,
    transparent=True,
    facecolor='none',
    edgecolor='none'
)
plt.close()

    using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_leiden_0.3']`


In [125]:
deg_df = pd.DataFrame(bbknn.uns["dea_leiden"]["names"]).head(100)  
print(deg_df)

                0              1         2       3      4        5         6  \
0           Pcdh7       Ivns1abp    Abca8a    Myl2   Gpc6    Cyyr1  Arhgap15   
1          Tnni3k  D830005E20Rik     Bicc1    Tpm1   Ebf1     Flt1    Slc9a9   
2            Mlip       Ppargc1a     Pcdh9      Mb  Mast4   Adgrl4      Pid1   
3           Pde4b          Esrrg     Rbms3   Actc1  Maml2    Ptprb    Inpp5d   
4         Filip1l          Rbm20     Tenm3   Tnni3  Rbms3   Adgrf5     Ptprc   
..            ...            ...       ...     ...    ...      ...       ...   
95       Cacna2d1         Chchd3  Serping1    Etfb  Prkch     Nrp2    Tm6sf1   
96           Ano4          Rmdn1      Mid1     B2m   Lhfp     Nav3     Acer3   
97          Ptpn4          Casz1  Crispld2  Ndufb8  Lamc1   mt-Co2  Rap1gds1   
98  D830024N08Rik           Nav2     Bmper    Ldhb  Scn7a  mt-Atp6    Srgap2   
99       Ppargc1b           Asb2   Gucy1a2    Mdh2  Lsamp   Zfp366   Rasgrp3   

           7        8        9        1

In [126]:
deg_df.to_csv(f'./Github/Nuclear_hashing_2025/data/DE_genes_HTODemux_raw_manual_annotation_batch_correction_{timestamp}.csv',sep=',')

# Cluster Annotation - based on DEGs

In [127]:
toppfun_annotation = {
"0":"Ventricular Cardiomyocytes",
"1":"Ventricular Cardiomyocytes",
"2":"Fibroblasts",
"3":"Cytoplasmic Cardiomyocytes",
"4":"Mesenchymal",
"5":"Endothelial",
"6":"Immune cells",
"7":"Immune cells",
"8":"Mesenchymal",
"9":"Endothelial",
"10":"Unclear",
"11":"Mesenchymal",
"12":"Endothelial",
"13":"Mesenchymal",
"14":"Smooth muscle cell",
}

In [128]:
bbknn.obs["toppfun_annotation"] = bbknn.obs['leiden_0.3'].map(toppfun_annotation)

In [129]:
sc.pl.umap(
    bbknn,
    color=["toppfun_annotation"],
    frameon=False,
    show=False
)
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_toppfun_anno.png",
    transparent=True,
    dpi=300,
    facecolor='none',
    edgecolor='none'
)
plt.close() 

In [130]:
ax = sc.pl.umap(
    bbknn,
    color=["toppfun_annotation"],
    frameon=False,
    show=False,
    legend_loc='right margin' 
)

fig = plt.gcf()

plt.tight_layout(rect=[0, 0, 0.85, 1])  
plt.savefig(
    "./Github/Nuclear_hashing_2025/figs_bbknn_batch_effect_removed/umap_toppfun_anno_legend_outside.png",
    dpi=300,
    bbox_inches='tight',
    transparent=True,
    facecolor='none'
)
plt.close()

In [131]:
bbknn.write_h5ad(f'./Github/Nuclear_hashing_2025/data/reannotated_manual_toppfun_batch_corrected_HTODemux_raw_{timestamp}.h5ad')