See 0_process_for_deconvolution.ipynb for preprocessing steps

In [None]:
#Ensure packages are imported
import numpy as np
import pandas as pd
from matplotlib import rcParams
import os
import scanpy as sc
import sys
import anndata
from collections import defaultdict
import matplotlib as mpl
import matplotlib.pyplot as plt
import cmocean
import seaborn as sns

from rnasieve.preprocessing import model_from_raw_counts
import altair as alt
from rnasieve.algo import find_mixtures

# Prep for running model

In [None]:
#Only retain genes present in both bulk and sc datasets
joint_indices = adata_ref.var.index.intersection(adata_bulk.var.index)
adata_ref_joint = adata_ref[:,joint_indices]
adata_bulk_joint = adata_bulk[:,joint_indices]

In [None]:
# Raw counts prep
#this can take some time depending on number of single cell inputs
print('Aggregating by ontology class...')
counts_by_onto_class = {}
for i in range(len(adata_ref_joint)):
    sc = adata_ref_joint[i]
    if len(sc.obs['cluster_names']) == 0:
        continue
    cell_onto_class = sc.obs['cluster_names'][0]
    if cell_onto_class not in counts_by_onto_class:
        counts_by_onto_class[cell_onto_class] = np.empty((sc.X.shape[1], 0), dtype=np.float32)
    counts_by_onto_class[cell_onto_class] = np.hstack(
        (counts_by_onto_class[cell_onto_class], sc.X.toarray().reshape(-1, 1)))
print('Done!')   

In [None]:
# Bulk prep
#Prepares bulk data to be run through model
print('Aggregating bulks by age group...')
G = adata_bulk_joint.n_vars
bulk_by_age = defaultdict(list)
for i in range(len(adata_bulk_joint)):
    bulk = adata_bulk_joint[i]
    if len(adata_bulk_joint.obs['Tumor_type']) == 0:
        continue
    age = adata_bulk_joint.obs['Tumor_type'][0]
    bulk_by_age[age].append(bulk.X.toarray().reshape(-1, 1))

bulk_labels = []
psis = np.empty((G, 0), dtype=np.float32)
for age in sorted(bulk_by_age.keys()):
    bulks = bulk_by_age[age]
    for i in range(len(bulks)):
        psis = np.hstack((psis, bulks[i]))
print('Done!')

In [None]:
#Set up model
#change psis[:, :x] to x=number of bulk samples you have
model, cleaned_psis = model_from_raw_counts(counts_by_onto_class, psis[:, :22])

# Run model

In [None]:
#Run model
#This will take a while depending on number of samples
output=model.predict(cleaned_psis)

In [None]:
output

Upon finishing, can output cell type proportion estimates to format of your choice (e.g. .csv)

# Create output graphs

In [None]:
#Create alpha hats melt object
alpha_hats_melt = pd.melt(
    output.reset_index(),
    id_vars=['index'],
    var_name='cell_type',
    value_name='proportion')

In [None]:
#Can re-order cell types
#For example
#Reorder
alpha_hats_melt['order'] = alpha_hats_melt['cell_type'].replace(
    {val: i for i, val in enumerate(['Horizontal basal cell', 'Globose basal cell', 'Immediate neural precursor', 'Immature olfactory sensory neuron', 'Mature olfactory sensory neuron', 'Microvillar cell', 'Sustentacular cell', "Bowman's Gland", 'Olfactory ensheathing cell', 
              'Respiratory basal cell', 'Respiratory suprabasal cell', 'Respiratory cell', 'Respiratory ciliated cell', 'Secretory cell', 'Goblet cell', 
             'Endothelial cell', 'Smooth muscle cell', 'Fibroblast',
             'Red blood cell', 'Monocyte', 'Macrophage', 'Mast cell', 'Neutrophil', 'Dendritic cell', 'Plasma cell', 'B cell', 'NK/NKT cell', 'CD4 T cell', 'CD8 T cell'])}
)

In [None]:
#Optionally, specify color palette for each cell type
#For example
dom=['HBC', 'GBC', 'INP', 'iOSN', 'mOSN', 'Microvillar', "Bowman's Gland", 'Sustentacular', 'Olf. Ensheathing', 
              'Resp. Basal', 'Resp. Suprabasal', 'Resp.', 'Resp. Ciliated', 'Secretory', 'Goblet', 
             'Endothelial', 'Smooth Muscle', 'Fibroblasts',
             'RBC', 'Monocytes', 'Macrophages', 'Mast Cells', 'Neutrophils', 'pDC', 'Plasma Cells', 'B Cells', 'NK/NKT', 'CD4 T', 'CD8 T']

rng= ['thistle', '#e95ea2', '#b61c7e', 'plum', 'darkorchid', '#00bed4', 'slateblue', 'cyan', 'deepskyblue',
      'yellowgreen', 'limegreen', 'darkseagreen', 'darkgreen', 'yellow', 'olive',
      'sienna', 'gray', 'tan'
      'red', 'lightcoral', 'darkorange', 'gold', 'tomato', 'bisque', 'black', 'rosybrown', '#a0a09e', 'gold', 'silver']

In [None]:
#Create stacked bar chart
