### Defining Pseudobulk Groupings by Barcode 

In [None]:
#Import libraries
import scanpy as sc
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import glob

In [None]:
#Set figure parameters
sc.set_figure_params(dpi=200)

In [None]:
#Read in AUCell AnnData object containing UMAP coordinates in the .obsm attributes.
aucell_regulon_anndata = sc.read_h5ad("aucell_regulon_anndata.h5ad")

In [None]:
#Subset data to target a single batch associated with a .bam file
target_batch = aucell_regulon_anndata[aucell_regulon_anndata.obs['Batch'] == 'target batch']

In [None]:
#Perform the KMeans clustering with 20 clusters and save as an observation column in subset object
km = KMeans(n_clusters=20, random_state=56)
km.fit(target_batch.obsm['X_umap'])
target_batch.obs['KMeans'] = km.labels_.astype(str)

In [None]:
#Transfer labels onto main object that has UMAP coordinates
aucell_regulon_anndata.obs['KMeans'] = target_batch.obs['KMeans']

In [None]:
#Plot UMAP with batch-specific KMeans labels, which will act as cell aggregates
sc.pl.umap(aucell_regulon_anndata,color='KMeans',groups=list(np.arange(0,20).astype(str)),size=15,legend_loc='on data',legend_fontsize=10,legend_fontoutline=True)

In [None]:
#Subset barcodes and KMeans label for saving
target_batch_out = pd.DataFrame(target_batch.obs[['KMeans']])

In [None]:
#Save barcodes and KMeans labels
target_batch_out.to_csv('Pseudobulk_labels.csv')