# This notebook is to create pseudobulk cells for each of the 3 cell atlases

## Imports

In [1]:
import scanpy.api as sc
import numpy as np
import pandas as pd
import scanpy
import scipy
import pandas as pd



In [2]:
#Loading datasets
mca = scanpy.read_h5ad('/work/sduknn/Andreas/TM_MCA/MCA/mca_scanpy_no_processing.h5ad')
facs = scanpy.read_h5ad('/work/sduknn/Andreas/TM_MCA/TM/tm_facs_scanpy_no_processing.h5ad')
droplet = scanpy.read_h5ad('/work/sduknn/Andreas/TM_MCA/TM/tm_droplet_scanpy_no_processing.h5ad')

In [4]:
#getting annotation from scmap projection of TM - 10x
mca_ann = pd.read_csv('/work/sduknn/Andreas/TM_MCA/MCA/mca_annotation_projected_from_drop.tsv', sep= '\t')

facs_ann = pd.read_csv('/work/sduknn/Andreas/TM_MCA/TM/facs_annotation_projected_from_drop.tsv', sep= '\t')

In [5]:
mca.obs['original_annotation'] =  mca_ann['original'].values
mca.obs['from_droplet'] =  mca_ann['projected_drom_droplet'].values

facs.obs['original_annotation'] =  facs_ann['original'].values
facs.obs['from_droplet'] =  facs_ann['projected_drom_droplet'].values

## Computing pseudobulk cells

#### Here example with 20 cell pseudobulk cells. (though we used 50 cell pseudobulk)

In [1]:
n_cells = 20

def pseudobulk(anndata = object, tissue = str, celltype = str, n_cells = int):
    """Averages cell expressions of a Scanpy object. Average of n cells from each celltype in each tissue.
    
    anndata : An anndata object
    tissue : A string representing the column name of the tissue attribute within the anndata.obs pandas dataframe
    celltype : Like tissue, but column name of the celltype column
    n-cells : Integer representing the ammount of cells to average over
    
    Returns : An anndata object with the averaged expression profiles
    """
    avg_mat = pd.DataFrame()
    ann_mat = pd.DataFrame()
    meta_mat = pd.DataFrame()
    for tiss in set(anndata.obs[tissue]):
        a = anndata[anndata.obs[tissue] == tiss]
        print('Subsetting tissue: '+ str(tiss))
        
        for clust in set(a.obs[celltype]):
            b = a[a.obs[celltype] == clust]
            print('Subsetting celltype: '+str(clust)+' in '+ str(tiss))
            
            n = b.n_obs // n_cells
            if n != 0:
                x = 1
                for i in range(0, n*n_cells, n_cells):
                    x = x + 1
                    cell_numbers = [*range(i,i+n_cells)]

                    avg = pd.DataFrame(data = b.X[cell_numbers].sum( axis = 0).T, index = b.var_names, columns = [str(tiss)+'_' + str(clust) + '_' +str(x)])
                    avg_mat = avg_mat.append(avg.T)
                    ann_mat = ann_mat.append(pd.DataFrame(data=clust, index = [str(tiss)+'_'+str(clust) + '_' +str(x)], columns = ['celltype']))
                    meta_mat = meta_mat.append(pd.DataFrame(data=tiss, index = [str(tiss)+'_'+str(clust) + '_' +str(x)], columns = ['tissue']))
    scanpy_object = sc.AnnData(X=(scipy.sparse.csr_matrix(avg_mat)), obs=ann_mat, var= pd.DataFrame(data = None, index = avg_mat.columns))
    return(scanpy_object)               

In [10]:
## 20 cell pseudobulk from TM - 10X
droplet = pseudobulk(droplet, 'tissue', 'cell_ontology_class', n_cells)

Subsetting tissue: Tongue
Subsetting celltype: basal cell of epidermis in Tongue
Subsetting celltype: keratinocyte in Tongue
Subsetting celltype: Langerhans cell in Tongue
Subsetting tissue: Lung
Subsetting celltype: nan in Lung
Subsetting celltype: classical monocyte in Lung
Subsetting celltype: leukocyte in Lung
Subsetting celltype: alveolar macrophage in Lung
Subsetting celltype: myeloid cell in Lung
Subsetting celltype: T cell in Lung
Subsetting celltype: natural killer cell in Lung
Subsetting celltype: lung endothelial cell in Lung
Subsetting celltype: ciliated columnar cell of tracheobronchial tree in Lung
Subsetting celltype: non-classical monocyte in Lung
Subsetting celltype: type II pneumocyte in Lung
Subsetting celltype: mast cell in Lung
Subsetting celltype: B cell in Lung
Subsetting celltype: stromal cell in Lung
Subsetting tissue: Mammary_Gland
Subsetting celltype: basal cell in Mammary_Gland
Subsetting celltype: T cell in Mammary_Gland
Subsetting celltype: luminal epithel

In [11]:
droplet

AnnData object with n_obs × n_vars = 2713 × 23433 
    obs: 'celltype'

In [12]:
## 20 cell pseudobulk from TM - SS2
facs = pseudobulk(facs, 'tissue', 'from_droplet', n_cells)

Subsetting tissue: Pancreas
Subsetting celltype: classical monocyte in Pancreas
Subsetting celltype: skeletal muscle satellite cell in Pancreas
Subsetting celltype: endocardial cell in Pancreas
Subsetting celltype: myeloid cell in Pancreas
Subsetting celltype: ciliated columnar cell of tracheobronchial tree in Pancreas
Subsetting celltype: epithelial cell in Pancreas
Subsetting celltype: basal cell of epidermis in Pancreas
Subsetting celltype: mesenchymal cell in Pancreas
Subsetting celltype: immature B cell in Pancreas
Subsetting celltype: mesangial cell in Pancreas
Subsetting celltype: kidney capillary endothelial cell in Pancreas
Subsetting celltype: type II pneumocyte in Pancreas
Subsetting celltype: fibroblast in Pancreas
Subsetting celltype: DN1 thymic pro-T cell in Pancreas
Subsetting celltype: endothelial cell of hepatic sinusoid in Pancreas
Subsetting celltype: mesenchymal stem cell in Pancreas
Subsetting celltype: natural killer cell in Pancreas
Subsetting celltype: endotheli

In [13]:
facs

AnnData object with n_obs × n_vars = 2571 × 23433 
    obs: 'celltype'

In [14]:
## 20 cell pseudobulk from MCA
mca = pseudobulk(mca, 'tissue', 'from_droplet', n_cells)

Subsetting tissue: Uterus
Subsetting celltype: unassigned in Uterus
Subsetting celltype: skeletal muscle satellite cell in Uterus
Subsetting celltype: T cell in Uterus
Subsetting celltype: macrophage in Uterus
Subsetting tissue: Stomach
Subsetting celltype: classical monocyte in Stomach
Subsetting celltype: skeletal muscle satellite cell in Stomach
Subsetting celltype: endocardial cell in Stomach
Subsetting celltype: bladder cell in Stomach
Subsetting celltype: epithelial cell in Stomach
Subsetting celltype: mesenchymal cell in Stomach
Subsetting celltype: mesangial cell in Stomach
Subsetting celltype: hematopoietic precursor cell in Stomach
Subsetting celltype: fibroblast in Stomach
Subsetting celltype: DN1 thymic pro-T cell in Stomach
Subsetting celltype: late pro-B cell in Stomach
Subsetting celltype: T cell in Stomach
Subsetting celltype: endothelial cell in Stomach
Subsetting celltype: macrophage in Stomach
Subsetting celltype: stromal cell in Stomach
Subsetting celltype: basal ce

In [15]:
mca

AnnData object with n_obs × n_vars = 11371 × 39855 
    obs: 'celltype'

In [17]:
mca.write('/work/sduknn/Andreas/TM_MCA/MCA/mca_20_cell_pseudobulk.h5ad')
facs.write('/work/sduknn/Andreas/TM_MCA/TM/facs_20_cell_pseudobulk.h5ad')
droplet.write('/work/sduknn/Andreas/TM_MCA/TM/droplet_20_cell_pseudobulk.h5ad')