# Imports

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt 
import scanpy as sc
from matplotlib import rcParams
import gc
import glob
import numpy as np
from matplotlib.colors import to_hex
import pandas as pd
import matplotlib as mpl

In [None]:
import warnings
warnings.filterwarnings("ignore")
rcParams['figure.figsize'] = (4, 3)
sns.set_style("whitegrid", {'axes.grid' : False})

In [None]:
base_path = '/lustre/groups/imm01/workspace/irene.bonafonte/Projects/2023May_nichecompass/nichecompass-reproducibility'

# Functions

In [None]:
def load_adata(suffix, model='reference_query_mapping', dataset='nanostring_cosmx_human_nsclc'):    
    model_folder = glob.glob(f'{base_path}/artifacts/{dataset}/models/{model}/*_{suffix}')[0]
    adata_path = f"{model_folder}/{dataset}_{model}.h5ad"
    adata = sc.read_h5ad(adata_path)
    
    from matplotlib.colors import to_hex
    batch_colors = np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('tab20b').colors)[[0,1,3,5,8,11,13,17],:])
    batch_colors = {b: c for b, c in zip(['lung5_rep1','lung5_rep2','lung5_rep3','lung6','lung9_rep1','lung9_rep2','lung12','lung13'], batch_colors)}
    adata.uns['batch_colors'] = [batch_colors[b] for b in adata.obs.batch.unique()]
    
    return adata, adata_path

In [None]:
def plot_cluster_proportions(cluster_props, 
                             cluster_palette=None,
                             xlabel_rotation=0,
                             figsize=(9,4),
                             ax=None,
                             figs=None): 
    if ax is None:
        figs, ax = plt.subplots(figsize=figsize)
        figs.patch.set_facecolor("white")
        figs.tight_layout()
        
    cmap = None
    if cluster_palette is not None:
        cmap = sns.palettes.blend_palette(
            cluster_palette, 
            n_colors=len(cluster_palette), 
            as_cmap=True)    
   
    cluster_props.plot(
        kind="bar", 
        stacked=True, 
        ax=ax, 
        legend=None, 
        colormap=cmap
    )
    
    ax.legend(bbox_to_anchor=(1.01, 1), frameon=False, title="Cluster").remove()
    ax.tick_params(axis="x", rotation=xlabel_rotation, bottom=False)
    ax.tick_params(axis="y", rotation=90)
    ax.set_xlabel('Niche', fontsize=20)
    ax.set_ylabel("Proportion", fontsize=20)
    ax.tick_params(axis='both', which='major', labelsize=15)
    ax.spines.left.set_bounds(0, 100)
    ax.spines.right.set_visible(False)
    ax.spines.bottom.set_visible(False)
    ax.spines.top.set_visible(False)

    return ax

In [None]:
def plot_latent(adata, model='reference_query_mapping'):
    
    # plot umap and spatial with general characteristics
    if model == 'reference_query_mapping':
        query = adata.obs.batch[adata.obs.mapping_entity=='query'].unique()[0]
        sc.pl.umap(adata, color=['mapping_entity','cell type', 'batch','niche'], ncols=4, wspace=0.5, size=0.5)
        sc.pl.embedding(adata[adata.obs.batch==query], basis="spatial", color=['cell type', 'niche'], ncols=2, wspace=0.5, size=1)
        
    elif model == 'reference':
        sc.pl.umap(adata, color=['batch','cell type','niche'], ncols=3, wspace=0.5, size=0.5)

    # plot clusters
    cluster_res = adata.obs.columns[adata.obs.columns.str.contains('latent_leiden_')]
    for cl in cluster_res:
        if f'{cl}_colors' in adata.uns.keys():
            del adata.uns[f'{cl}_colors']
    sc.pl.umap(adata, color=cluster_res, ncols=4, wspace=0.5, size=0.5, palette=sc.pl.palettes.vega_20_scanpy)
    
    if model == 'reference_query_mapping':
        sc.pl.embedding(adata[adata.obs.batch==query], basis="spatial", color=cluster_res, ncols=4, wspace=0.5, size=1)    
        
    return

In [None]:
def res_details(adata, resolution=0.4, model='reference_query_mapping'):

    # plot spatial for all samples
    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(21, 6))
    for i, batch in enumerate(adata.obs.batch.unique()):
        if i < 4:
            sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=f'latent_leiden_{resolution}', size=1, show=False, ax=axes[0,i], title=batch)
        else:
            sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=f'latent_leiden_{resolution}', size=1, show=False, ax=axes[1,i-4], title=batch)
    plt.tight_layout()
    plt.show()
    
    # plot proportions
    figs, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 3))
    plot_var = f'latent_leiden_{resolution}'
    
    for i, cluster_var in enumerate(['cell type', 'niche', 'batch']):
        props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
        props = props.pivot(columns=plot_var, index=cluster_var).T
        props.index = props.index.droplevel(0)
        props.fillna(0, inplace=True)
        props = props.div(props.sum(axis=1), axis=0)*100 
        axes[i] = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(4,3), ax=axes[i], figs=figs)
    figs.show()

    return

In [None]:
from matplotlib.colors import ListedColormap, to_rgb, to_hex
def colorFader(c1, c2='#FFFFFF', n=10, mix=0):
    n+=1
    c1=np.array(to_rgb(c1))
    c2=np.array(to_rgb(c2))
    colors=[]
    for x in range(n+1):
         colors.append(to_hex((1-x/n)*c1 + c2*x/n))
    return colors[:-1]

# Reference 43
no contrastive, L1 0 add on, 1000 masked, decrease kl x10, 0.03 active GPs

## Prepare for analysis

### Select resolution

In [None]:
adata, adata_path = load_adata(suffix='43', model='reference')
figure_path = adata_path.replace('models','figures').replace('/nanostring_cosmx_human_nsclc_reference.h5ad','')
print(figure_path)
adata

In [None]:
# merge super small cluster
adata.obs.loc[(adata.obs['latent_leiden_0.4']=='11').values,'latent_leiden_0.4'] = '6'
adata.obs.loc[(adata.obs['latent_leiden_0.45']=='12').values,'latent_leiden_0.45'] = '7'
adata.obs.loc[(adata.obs['latent_leiden_0.5']=='12').values,'latent_leiden_0.5'] = '7'

adata.obs['latent_leiden_0.4'] = adata.obs['latent_leiden_0.4'].cat.remove_unused_categories()
adata.obs['latent_leiden_0.45'] = adata.obs['latent_leiden_0.45'].cat.remove_unused_categories()
adata.obs['latent_leiden_0.5'] = adata.obs['latent_leiden_0.5'].cat.remove_unused_categories()

In [None]:
plot_latent(adata, model='reference')

In [None]:
latent_leiden_resolution=0.45
res_details(adata, resolution=latent_leiden_resolution, model='reference')

In [None]:
leiden2niche = {
    '0': '1- Tumor (stroma border)', '2': '2- Tumor interior', '5': '3- Tumor (neutrophil border)', '7': '4- Tumor interior', '11': '5- Infiltrated tumor',
    '3': '6- Neutrophil expansion', '4': '7- Myeloid cells rich stroma', '1': '8- Stroma', '6': '9- Plasmablast rich stroma', '9': '10- Lymphoid rich stroma', '8': '11- Lymphoid aggregates', '10': '12- Macrophage rich stroma'
}

leiden2leiden = {'0': '1', '2': '2', '5': '3', '7': '4', '11': '5', '3': '6', '4': '7', '1': '8', '6': '9', '9': '10', '8': '11', '10': '12'}
adata.obs['niche'] = adata.obs['latent_leiden_0.45'].map(leiden2niche)
adata.obs['leidenOrd'] = adata.obs['latent_leiden_0.45'].map(leiden2leiden)

In [None]:
if not 'cluster_grups' in adata.uns:
    adata.uns['cluster_groups'] = {}
    
adata.uns['cluster_groups'][f"latent_leiden_0.45"] = {
    'tumor_clusters': ['0', '2', '5', '7', '11'],
    'stroma_clusters': ['3', '4', '1', '6', '9', '8', '10'],
    'neutrophil_clusters': ['3', '4'],
    'macrophage_clusters': ['10'],
    'lymphoid_clusters': ['6', '9', '8']
}

adata.uns['cluster_groups'][f"leidenOrd"] = {
    'tumor_clusters': ['1', '2', '3', '4', '5'],
    'stroma_clusters': ['6', '7', '8', '9', '10', '11', '12'],
    'neutrophil_clusters': ['6', '7'],
    'macrophage_clusters': ['12'],
    'lymphoid_clusters': ['9', '10', '11']
}

### Dendogram

In [None]:
rcParams['figure.figsize'] = (6, 1.5)
sc.tl.dendrogram(adata, groupby="latent_leiden_0.45", use_rep='nichecompass_latent', n_pcs=adata.obsm['nichecompass_latent'].shape[1])
sc.tl.dendrogram(adata, groupby="niche", use_rep='nichecompass_latent', n_pcs=adata.obsm['nichecompass_latent'].shape[1])
sc.pl.dendrogram(adata, groupby="niche", orientation='bottom', show=True)
rcParams['figure.figsize'] = (4, 3)

### Define palette

In [None]:
adata.uns['cluster_groups']

In [None]:
adata.uns[f'dendrogram_latent_leiden_0.45']['categories_ordered']

In [None]:
rcParams['figure.figsize'] = (4, 0.5)
general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
a=np.outer(np.ones(len(general)),np.arange(0,1,0.01))   # pseudo image data
plt.imshow(a,aspect='auto',cmap=plt.get_cmap('Dark2'),origin="lower")
rcParams['figure.figsize'] = (4, 3)

In [None]:
# use dendogram order to define niche palette related to cell type
general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
tumor=colorFader(general[2], c2='#FFFFFF', n=5, mix=0)
lymphoid=general[4]
myeloid=general[1]
Blike=colorFader(general[0], c2='#FFFFFF', n=2, mix=0)
neutrophil=colorFader(general[3], c2='#FFFFFF', n=2, mix=0)
stroma=general[5]


leiden_colDict = {
    '0': tumor[0], '2': tumor[1], '5': tumor[2], '7': tumor[3], '11': tumor[4], 
    '3': neutrophil[0], '4': neutrophil[1], 
    '1': stroma, '6': Blike[0], '9': Blike[1], '8': lymphoid, '10': myeloid 
}
adata.uns['latent_leiden_0.45_colors'] = [x for x in leiden_colDict.values()]
adata.uns['leidenOrd_colors'] = [x for x in leiden_colDict.values()]
adata.uns['niche_colors'] = [x for x in leiden_colDict.values()]

In [None]:
adata.write_h5ad(adata_path)

## Overview plots

In [None]:
sc.set_figure_params(fontsize=14, dpi=120)

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['niche'], ncols=1, wspace=1, size=2.5, legend_fontsize='x-large', show=False, frameon=False, title=[''])
plt.savefig(f"{figure_path}/a1.pdf", bbox_inches="tight", format='pdf')
rcParams['figure.figsize'] = (4, 3)


In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['leidenOrd'], ncols=1, wspace=1, size=2.5, legend_fontsize='x-large', show=False, frameon=False, title=[''], legend_loc='on data')
plt.savefig(f"{figure_path}/a1_num.pdf", bbox_inches="tight", format='pdf')
rcParams['figure.figsize'] = (4, 3)

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['cell type'], ncols=1, wspace=1, size=2.5, legend_fontsize='x-large', show=False, frameon=False, title=[''])
plt.savefig(f"{figure_path}/a2.pdf", bbox_inches="tight", format='pdf')
rcParams['figure.figsize'] = (4, 3)

In [None]:
adata.obs['donor'] = adata.obs.batch.str.replace('_',' ')
adata.obs['donor'] = adata.obs.donor.str.replace('lung','Donor ')
adata.obs['donor'] = adata.obs.donor.str.replace('rep','r')
adata.uns['donor_colors'] = adata.uns['batch_colors']

rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['donor'], ncols=1, wspace=1, size=2.5, legend_fontsize='x-large', show=False, frameon=False, title=[''])
plt.savefig(f"{figure_path}/a3.pdf", bbox_inches="tight", format='pdf')
rcParams['figure.figsize'] = (4, 3)

In [None]:
n = adata.obs.batch.nunique()
fig, axes = plt.subplots(nrows=2, ncols=n, figsize=(8*n,6*2))

for i, batch in enumerate(adata.obs.batch.unique()):
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['cell type'], size=6, legend_loc=None, frameon=False, title=[''], ax=axes[0,i], show=False)
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['leidenOrd'], size=6, legend_loc=None, frameon=False, title=[''], ax=axes[1,i], show=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.01, hspace=0.01)
plt.savefig(f"{figure_path}/histology_h.pdf", bbox_inches="tight", format='pdf')

In [None]:
cluster_var = 'batch'
plot_var = 'leidenOrd'
props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(8,3))
plt.savefig(f"{figure_path}/a3_prop.pdf", bbox_inches="tight", format='pdf')


In [None]:
cluster_var = 'batch'
plot_var = 'leidenOrd'
props = adata[adata.obs.leidenOrd.isin(['3','5'])].obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(2,3))
plt.savefig(f"{figure_path}/i.pdf", bbox_inches="tight", format='pdf')

In [None]:
cluster_var = 'cell type'
plot_var = 'leidenOrd'
props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(8,3))
plt.savefig(f"{figure_path}/a2_prop.pdf", bbox_inches="tight", format='pdf')

In [None]:
cluster_var = 'cell type'
plot_var = 'leidenOrd'
props = adata[~adata.obs.niche.str.contains('tumor', case=False)].obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(5.5,3))
plt.savefig(f"{figure_path}/j.pdf", bbox_inches="tight", format='pdf')

In [None]:
cluster_var='niche'
plot_var='cell type'

props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
props

In [None]:
props.iloc[0,0:5].sum()

In [None]:
plot_var='niche'
cluster_var='cell type'

props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
props

In [None]:
adata[(adata.obs.batch=='lung12') & (adata.obs['cell type']=='tumor')].obs.niche.value_counts()/adata[(adata.obs.batch=='lung12') & (adata.obs['cell type']=='tumor')].shape[0]

In [None]:
sc.set_figure_params(fontsize=12, dpi=100)

## Niche characterization

In [None]:
adata.layers["scaled"] = sc.pp.scale(adata, copy=True).X

markers = {
    'B-cell': ['MS4A1', 'CD37', 'CD79A','CD19'],
    'NK/T cell': ['CD2','IL7R','CD3G','CTLA4','CD69','GZMA','CD28','TIGIT'],
    'endothelial': ['VWF','FLT1','CDH5','CLEC14A','RAMP2'],
    'epithelial': ['CCL20','LAMP3','AQP3'],
    'fibroblast': ['COL1A1','COL3A1','FN1'],
    'mast': ['TPSB2', 'TPSAB1', 'CPA3'],
    'DC/monocyte': ['CD163','CD74','HLA-DRB1','LYZ','FCGR3A','CD68','CD14'], # DC, monocyte, macrophage (SPP1 or not)    
    'macrophage': ['MARCO', 'SPP1', 'C1QC', 'GPNMB'],
    'neutrophil': ['CXCL8','CXCR1','CXCR2','IL1R2'],
    'plasmablast': ['IGKC','IGHG1','JCHAIN','XBP1','MZB1', 'CD38'],
}
markers_list = [x for y in markers.values() for x in y]

keep_niche = []
niche_name = []
for niche in adata.obs.niche.unique():
    if not 'umor' in niche:
        props = (adata[adata.obs.niche == niche].obs['cell type'].value_counts() / len(adata[adata.obs.niche == niche]))
        keep = adata[adata.obs.niche == niche].obs['cell type'].value_counts()[(adata[adata.obs.niche == niche].obs['cell type'].value_counts() / len(adata[adata.obs.niche == niche])) > 0.1].index.values
        keep_niche.append(keep.astype(str).tolist())
        niche_name.append(niche)

In [None]:
# set up grid
TEXT_WIDTH = 6.7261  # in
DPI = 360

def set_font_size(font_size):
    plt.rc('font', size=font_size)          # controls default text sizes
    plt.rc('axes', titlesize=font_size)     # fontsize of the axes title
    plt.rc('axes', labelsize=font_size)     # fontsize of the x and y labels
    plt.rc('xtick', labelsize=font_size-2)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=font_size-2)    # fontsize of the tick labels
    plt.rc('legend', fontsize=font_size-4, title_fontsize=font_size)    # legend fontsize
    plt.rc('figure', titlesize=font_size)   # fontsize of the figure title
    
sns.set(context='paper', style='whitegrid')
plt.rc('grid', linewidth=0.3)
sns.set_palette('colorblind')
sc.set_figure_params(vector_friendly=True, dpi_save=DPI)
set_font_size(10)
plt.rcParams['figure.constrained_layout.use'] = True


fig = plt.figure(figsize=(TEXT_WIDTH, TEXT_WIDTH * 0.5), dpi=DPI)
gridspecs = {}

gridspecs["columns"] = mpl.gridspec.GridSpec(
    figure=fig,
    nrows=1,
    ncols=2,
    height_ratios=[1],
    width_ratios=[1, 4],
)

gridspecs['barplots'] = mpl.gridspec.GridSpecFromSubplotSpec(
    subplot_spec=gridspecs["columns"][0],
    nrows=len(niche_name),
    ncols=1,
    height_ratios=[len(x) for x in keep_niche],
    width_ratios=[1],
    hspace=0.35,
)
gridspecs['heatmaps'] = mpl.gridspec.GridSpecFromSubplotSpec(
    subplot_spec=gridspecs["columns"][1],
    nrows=len(niche_name),
    ncols=1,
    height_ratios=[len(x) for x in keep_niche],
    width_ratios=[1],
    hspace=0.35,
)

ax_barplots, ax_heatmaps = {}, {}
for i in range(len(keep_niche)):
    ax_barplots[i] = fig.add_subplot(gridspecs['barplots'][i])
    ax_heatmaps[i] = fig.add_subplot(gridspecs['heatmaps'][i])    

In [None]:
# cell type proportion
batch_col_dict = pd.Series({cell: color for cell, color in zip(adata.obs['cell type'].cat.categories, adata.uns['cell type_colors'])})
for i, niche in enumerate(niche_name):
    props = (adata[adata.obs.niche == niche].obs['cell type'].value_counts() / len(adata[adata.obs.niche == niche]))
    props = pd.DataFrame({'cell type': keep_niche[i],'proportion': props[keep_niche[i]], 'color': batch_col_dict[keep_niche[i]]})
    
    sns.barplot(props, x='proportion', y='cell type', hue='proportion', ax=ax_barplots[i], dodge=False, palette='Blues' , width=0.8)
    ax_barplots[i].legend().remove()
    ax_barplots[i].tick_params(axis="x", rotation=90, bottom=False)
    ax_barplots[i].tick_params(axis="y", rotation=0)
    ax_barplots[i].set_xlabel('')
    ax_barplots[i].set_ylabel('')
    ax_barplots[i].tick_params(axis='both', which='major', labelsize=6)
    ax_barplots[i].spines.left.set_visible(False)
    ax_barplots[i].spines.right.set_visible(False)
    ax_barplots[i].spines.bottom.set_visible(False)
    ax_barplots[i].spines.top.set_visible(False)
    ax_barplots[i].set_xlim(ax_barplots[i].get_xlim()[1], ax_barplots[i].get_xlim()[0])
    ax_barplots[i].yaxis.set_label_position('right')
    ax_barplots[i].yaxis.set_ticks_position('right')
    ax_barplots[i].get_xaxis().set_visible(False)
    ax_barplots[i].tick_params(axis='y', which='both',length=0)

# cell type markers
for i, niche in enumerate(niche_name):
    adata_tmp = adata[(adata.obs.niche == niche) & adata.obs['cell type'].isin(keep_niche[i])]
    data = pd.DataFrame(adata_tmp[:,markers_list].layers['scaled'], index=adata_tmp.obs_names, columns=markers_list)
    data['cell type'] = adata_tmp.obs['cell type'].values
    data = data.groupby('cell type').mean()
    data = data.loc[keep_niche[i],:]
    
    ax_heatmaps[i] = sns.heatmap(data, cmap='RdBu_r', center=0, cbar=False, linewidth=0.1, linecolor='gray', square=False, xticklabels=False, yticklabels=False, ax=ax_heatmaps[i])        
    # ax_heatmaps[i] = sns.heatmap(data, cmap='RdBu_r', center=0, cbar=False, linewidth=0.1, linecolor='gray', square=False, xticklabels=True, yticklabels=False, ax=ax_heatmaps[i]) # to get gene labels
    ax_heatmaps[i].set_ylabel('')
    ax_heatmaps[i].set_xlabel('')
    ax_heatmaps[i].yaxis.set_label_position('right')
    ax_heatmaps[i].patch.set_edgecolor('black')
    ax_heatmaps[i].patch.set_linewidth(1)

fig.savefig('../../artifacts/nanostring_cosmx_human_nsclc/figures/reference/19102023_172844_43/markers_plot.pdf', dpi=DPI, bbox_inches='tight')

## Niche highlight plots

In [None]:
# add gp
gp = 'CXCL1_ligand_receptor_GP'
adata.obs[gp] = - adata.obsm['nichecompass_latent'][:,adata.uns['nichecompass_active_gp_names']==gp]

rcParams['figure.figsize'] = (8, 6)
batches = ['lung9_rep2','lung12']
nr = len(batches)
nc = 3
fig, axes = plt.subplots(nrows=nr, ncols=nc, figsize=(8*nc,6*nr))
for i, batch in enumerate(batches):
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['cell type'], size=6, frameon=False, title=[''], ax=axes[i,1], show=False, legend_loc=None)
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['niche'], size=6, frameon=False, title=[''], ax=axes[i,0], show=False, legend_loc=None, groups=['1- Tumor (stroma border)','3- Tumor (neutrophil border)','6- Neutrophil expansion'])
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['CXCL1_ligand_receptor_GP'], size=10, frameon=False, title=[''], ax=axes[i,2], show=False, colorbar_loc=None, cmap='RdGy_r')
    
fig.tight_layout()
fig.subplots_adjust(wspace=0.01, hspace=0.01)

rcParams['figure.figsize'] = (4, 3)

In [None]:
batches = ['lung5_rep1','lung12']
n = len(batches)
fig, axes = plt.subplots(nrows=2, ncols=n, figsize=(8*n,6*2))
for i, batch in enumerate(batches):
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['cell type'], size=6, frameon=False, title=[''], ax=axes[1,i], show=False, legend_loc=None)
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['niche'], size=6, groups=['9- Plasmablast rich stroma','11- Lymphoid aggregates'], frameon=False, title=[''], ax=axes[0,i], show=False, legend_loc=None)

fig.tight_layout()
fig.subplots_adjust(wspace=0.01, hspace=0.01)
plt.savefig(f"{figure_path}/l.pdf", bbox_inches="tight", format='pdf')

In [None]:
# highlights
batches = ['lung5_rep1','lung9_rep2']
n = len(batches)
fig, axes = plt.subplots(nrows=2, ncols=n, figsize=(8*n,6*2))
for i, batch in enumerate(batches):
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['cell type'], size=6, frameon=False, title=[''], ax=axes[1,i], show=False, legend_loc=None)
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['niche'], size=6, groups=['6- Neutrophil expansion','7- Myeloid cells rich stroma'], frameon=False, title=[''], ax=axes[0,i], show=False, legend_loc=None)

fig.tight_layout()
fig.subplots_adjust(wspace=0.01, hspace=0.01)
plt.savefig(f"{figure_path}/m.pdf", bbox_inches="tight", format='pdf')

## Neighborhood composition

In [None]:
from sklearn.neighbors import NearestNeighbors
leiden_col_key='leidenOrd'
n=25

# compute n neighbours per cell type
knn = {}
cell_counts = {}
for b in adata.obs.batch.unique():
    X = adata[adata.obs.batch==b].obsm['spatial']
    celltypes = adata[adata.obs.batch==b].obs['cell type'].astype(str).values.astype('<U22')
    cellnames = adata[adata.obs.batch==b].obs_names

    knn[b] = NearestNeighbors(n_neighbors=n)
    knn[b].fit(X)
    knn[b] = knn[b].kneighbors(X, return_distance=False)    
    knn[b] = celltypes[knn[b]]

    for i in range(len(cellnames)):
        unique, counts = np.unique(knn[b][i,:], return_counts=True)
        cell_counts[cellnames[i]] = dict(zip(unique, counts))

adata.obsm[f'k{n}_neighbours_celltype'] = pd.DataFrame(cell_counts).T.fillna(0)

In [None]:
leiden_col_key='leidenOrd'
for structure, groups in adata.uns['cluster_groups'][leiden_col_key].items():
    # format for plotting and keep only those with an interesting number
    dt = adata.obsm[f'k{n}_neighbours_celltype']
    dt[leiden_col_key] = adata.obs[leiden_col_key]
    dt = dt.loc[dt[leiden_col_key].isin(groups),:]
    dt[leiden_col_key] = dt[leiden_col_key].cat.remove_unused_categories()
    dt = dt.reset_index()
    dt = pd.melt(dt, id_vars=['index',leiden_col_key], value_name=f'n NN/{n}', var_name='cell type')
    dt['cell type'] = pd.Categorical(dt['cell type'], categories=adata.obs['cell type'].cat.categories)
    if structure == 'tumor_clusters':
        include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.05) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
        rcParams['figure.figsize'] = (len(groups)*1.5, 4)
    else:
        include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.15) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
        rcParams['figure.figsize'] = (len(groups)*2, 4)
    dt = dt[dt['cell type'].isin(include[include].index.values)]
    dt['cell type'] = dt['cell type'].cat.remove_unused_categories()

    # plot
    sns.boxplot(data=dt, x=leiden_col_key, y=f'n NN/{n}', hue='cell type', palette=adata.uns[f'cell type_colors'][include.values], showfliers = False).set(title='')
    plt.ylim((0, 25))
    plt.xticks(rotation=0)
    plt.xlabel("", fontsize=17.5)
    plt.ylabel(f'neighborhood composition', fontsize=17.5)
    plt.legend(frameon=False, loc='center left', bbox_to_anchor=(1, 0.7), fontsize=15)
    plt.tick_params(bottom=False, labelsize=17.5)
    sns.despine(offset=10, trim=True, bottom=True)
    plt.savefig(f"{figure_path}/spatial_{structure}_{b}.pdf", bbox_inches="tight", format='pdf')
    plt.show()
rcParams['figure.figsize'] = (4, 3)

In [None]:
# only sample 12
from sklearn.neighbors import NearestNeighbors
leiden_col_key='leidenOrd'
n=25

# compute n neighbours per cell type
knn = {}
cell_counts = {}
b = 'lung12'
X = adata[adata.obs.batch==b].obsm['spatial']
celltypes = adata[adata.obs.batch==b].obs['cell type'].astype(str).values.astype('<U22')   
cellnames = adata[adata.obs.batch==b].obs_names

knn[b] = NearestNeighbors(n_neighbors=n)
knn[b].fit(X)
knn[b] = knn[b].kneighbors(X, return_distance=False)    
knn[b] = celltypes[knn[b]]

for i in range(len(cellnames)):
    unique, counts = np.unique(knn[b][i,:], return_counts=True)
    cell_counts[cellnames[i]] = dict(zip(unique, counts))

res = pd.DataFrame(cell_counts).T.fillna(0)
res[leiden_col_key] = adata.obs.loc[res.index,leiden_col_key]

structure='tumor_clusters'
groups=adata.uns['cluster_groups'][leiden_col_key][structure]
    
# format for plotting and keep only those with an interesting number
dt = res.copy()
dt = dt.loc[dt[leiden_col_key].isin(['3','5']),:]
dt[leiden_col_key] = dt[leiden_col_key].cat.remove_unused_categories()
dt = dt.reset_index()
dt = pd.melt(dt, id_vars=['index',leiden_col_key], value_name=f'n NN/{n}', var_name='cell type')
dt['cell type'] = pd.Categorical(dt['cell type'], categories=adata.obs['cell type'].cat.categories)
include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.05) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
dt = dt[dt['cell type'].isin(include[include].index.values)]
dt['cell type'] = dt['cell type'].cat.remove_unused_categories()

# plot
rcParams['figure.figsize'] = (2*1.5, 4)
sns.boxplot(data=dt, x=leiden_col_key, y=f'n NN/{n}', hue='cell type', palette=adata.uns[f'cell type_colors'][include.values], showfliers = False).set(title='')
plt.ylim((0, 25))
plt.xticks(rotation=0)
plt.xlabel("", fontsize=17.5)
plt.ylabel(f'neighborhood composition', fontsize=17.5)
plt.legend(frameon=False, loc='center left', bbox_to_anchor=(1, 0.7), fontsize=15)
plt.tick_params(bottom=False, labelsize=17)
sns.despine(offset=10, trim=True, bottom=True)
plt.savefig(f"{figure_path}/spatial_{structure}_{b}.pdf", bbox_inches="tight", format='pdf')
plt.show()
rcParams['figure.figsize'] = (4, 3)

### cell mixing with symmetric graph

In [None]:
import squidpy as sq

# compute n neighbours per cell type
for n in [4, 25, 50]: 
    knn = {}
    cell_counts = {}
    for b in adata.obs.batch.unique():
        adata_tmp = adata[adata.obs.batch==b]
        celltypes = adata_tmp.obs['cell type'].astype(str).values.astype('<U22')
        cellnames = adata_tmp.obs_names
        
        sq.gr.spatial_neighbors(adata_tmp,
                                coord_type="generic",
                                spatial_key="spatial",
                                n_neighs=n)

        # Make adjacency matrix symmetric
        adata_tmp.obsp['spatial_connectivities'] = (
            adata_tmp.obsp['spatial_connectivities'].maximum(
                adata_tmp.obsp['spatial_connectivities'].T))
        
        boolean_connectivities = adata_tmp.obsp['spatial_connectivities'].A.astype(bool)

        for i in range(len(cellnames)):
            unique, counts = np.unique(celltypes[boolean_connectivities[i,:]], return_counts=True)
            cell_counts[cellnames[i]] = dict(zip(unique, counts))

    adata.obsm[f'k{n}_neighbours_celltype'] = pd.DataFrame(cell_counts).T.fillna(0)

In [None]:
# per cell type
for n in [4, 25, 50]:
    # format for plotting and keep only those with an interesting number
    leiden_col_key='cell type'
    dt = adata.obsm[f'k{n}_neighbours_celltype'].copy()
    dt['grouping_variable'] = adata.obs[leiden_col_key].copy()
    leiden_col_key = 'grouping_variable'
    # dt = dt.loc[dt[leiden_col_key]=='tumor']
    dt[leiden_col_key] = dt[leiden_col_key].cat.remove_unused_categories()
    dt = dt.reset_index()
    dt = pd.melt(dt, id_vars=['index',leiden_col_key], value_name=f'n NN/{n}', var_name='cell type')
    dt['cell type'] = pd.Categorical(dt['cell type'], categories=adata.obs['cell type'].cat.categories)

    include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.)
    # include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.15) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
    rcParams['figure.figsize'] = (adata.obs['cell type'].nunique()*2, 4)
    dt = dt[dt['cell type'].isin(include[include].index.values)]
    dt['cell type'] = dt['cell type'].cat.remove_unused_categories()

    # plot
    sns.boxplot(data=dt, x=leiden_col_key, y=f'n NN/{n}', hue='cell type', palette=adata.uns[f'cell type_colors'][include.values], showfliers = False).set(title='')
    # plt.ylim((0, n))
    plt.xticks(rotation=0)
    plt.xlabel("", fontsize=17.5)
    plt.ylabel(f'neighborhood composition', fontsize=17.5)
    plt.legend(frameon=False, loc='center left', bbox_to_anchor=(1, 0.7), fontsize=15)
    plt.tick_params(bottom=False, labelsize=17.5)
    sns.despine(offset=10, trim=True, bottom=True)
    plt.savefig(f"{figure_path}/spatial_{n}_cell_type.pdf", bbox_inches="tight", format='pdf')
    plt.show()
    rcParams['figure.figsize'] = (4, 3)


In [None]:
# in tumor niches
for n in [4, 25, 50]: 
    # format for plotting and keep only those with an interesting number
    leiden_col_key='leidenOrd'
    dt = adata.obsm[f'k{n}_neighbours_celltype'].copy()
    dt['leidenOrd'] = adata.obs[leiden_col_key].copy()
    dt = dt[(adata.obs['cell type'] == 'tumor') & (adata.obs.niche.str.contains('umor'))]
    # dt = dt.loc[dt[leiden_col_key]=='tumor']
    dt[leiden_col_key] = dt[leiden_col_key].cat.remove_unused_categories()
    dt = dt.reset_index()
    dt = pd.melt(dt, id_vars=['index',leiden_col_key], value_name=f'n NN/{n}', var_name='cell type')
    dt['cell type'] = pd.Categorical(dt['cell type'], categories=adata.obs['cell type'].cat.categories)

    include = (dt.groupby(['cell type','leidenOrd'])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0)
    # include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.15) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
    rcParams['figure.figsize'] = (adata.obs[leiden_col_key].nunique()*2, 4)
    dt = dt[dt['cell type'].isin(include[include].index.values)]
    dt['cell type'] = dt['cell type'].cat.remove_unused_categories()

    # plot
    sns.boxplot(data=dt, x=leiden_col_key, y=f'n NN/{n}', hue='cell type', palette=adata.uns[f'cell type_colors'][include.values], showfliers = False).set(title='')
    # plt.ylim((0, n))
    plt.xticks(rotation=0)
    plt.xlabel("", fontsize=17.5)
    plt.ylabel(f'neighborhood composition', fontsize=17.5)
    plt.legend(frameon=False, loc='center left', bbox_to_anchor=(1, 0.7), fontsize=15)
    plt.tick_params(bottom=False, labelsize=17.5)
    sns.despine(offset=10, trim=True, bottom=True)
    plt.savefig(f"{figure_path}/spatial_{n}_leiden.pdf", bbox_inches="tight", format='pdf')
    plt.show()
    rcParams['figure.figsize'] = (4, 3)

## Compare GEX

In [None]:
sc.tl.rank_genes_groups(adata, 'niche')
sc.set_figure_params(fontsize=20, dpi=200)
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5)
sc.set_figure_params(fontsize=12, dpi=100)

In [None]:
# only tumor
x = adata[adata.obs['niche'].isin([n for n in adata.obs['niche'].unique() if 'umor' in n])]
sc.tl.rank_genes_groups(x, 'niche')
rcParams['figure.figsize'] = (8, 12)
sc.pl.rank_genes_groups_dotplot(x, n_genes=5, dendrogram=False)
rcParams['figure.figsize'] = (4, 3)

In [None]:
for structure, clusters in adata.uns['cluster_groups']['leidenOrd'].items():
    print(structure)
    sc.tl.rank_genes_groups(adata, 'leidenOrd', groups=clusters)
    sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
# check important tumor markers
sc.pl.umap(adata, color=['CD24','CD44','SOX9','SFN','SLPI','OLFM4','SOX4','MIF','AKT1','FGFR1','EGFR','KRT5','CD9','TYK2','CDH1','NDRG1','VEGFA','IGFBP5','DUSP5','MMP7','ITGB4','EPCAM','SERPINA1','WIF1'], ncols=8, wspace=0.3, size=0.5, frameon=False, color_map='viridis')

In [None]:
# check important tumor markers
gf = ['EGF','EGFR','ERBB2','TGFB1', 'IGF1', 'IGF1R', 'IGF2', 'IGF2R', 'IGFBP3', 'IGFBP5', 'IGFBP6', 'IGFBP7', 'PDGFA', 'PDGFB', 'PDGFC', 'PDGFD', 'PDGFRA', 'PDGFRB', 'HIF1A', 'MAPK13', 'MAPK14']
ras = ['KRAS', 'MYC']
apoptosis = ['TP53','BCL2','BAX','BCL2L1']
markers = gf+ras+apoptosis
sc.pl.umap(adata, color=markers, ncols=8, wspace=0.3, size=0.5, color_map='viridis', frameon=False)

## Compare GPs
computed with the scripts/differential_gp.py script

In [None]:
rcParams['figure.figsize'] = (4, 3)

In [None]:
adata.uns['nichecompass_targets_categories_label_encoder'], adata.uns['nichecompass_sources_categories_label_encoder']

### Tumor niches

In [None]:
files = glob.glob(f"{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference/19102023_172844_43/gpTest_tumor_clusters_*_r0.45.csv")
significant_gps = []
for f in files: 
    significant_gps.append(pd.read_csv(f, index_col=0))
significant_gps = pd.concat(significant_gps, axis=0)
significant_gps = significant_gps[~significant_gps.gene_program.str.contains('Add-on')]
print(len(significant_gps))
significant_gps.head()

In [None]:
interest_gps = significant_gps.gene_program.values
invert = []

# overwrite to selected GPs
interest_gps = ['EFNB2_ligand_receptor_GP','Ghrl_ligand_receptor_target_gene_GP','CXCL1_ligand_receptor_GP','CEACAM6_ligand_receptor_GP','CXCL5_ligand_receptor_GP','Mmp7_ligand_receptor_target_gene_GP']
invert = ['EFNB2_ligand_receptor_GP','Ghrl_ligand_receptor_target_gene_GP','CXCL1_ligand_receptor_GP','CXCL5_ligand_receptor_GP','Mmp7_ligand_receptor_target_gene_GP']

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

In [None]:
sc.pl.embedding(adata[adata.obs.batch=='lung5_rep1'], basis="spatial", color=['CEACAM6_ligand_receptor_GP', 'PTK2','SRC','CEACAM6'], size=6, frameon=False, color_map='viridis')
sc.pl.umap(adata, color=['CEACAM6_ligand_receptor_GP', 'PTK2','SRC','CEACAM6'], size=1, frameon=False, color_map='viridis')

In [None]:
rcParams['figure.figsize'] = (8, 6)
adata.obs.columns = adata.obs.columns.str.replace('ligand_receptor_target_gene','LRT')
adata.obs.columns = adata.obs.columns.str.replace('ligand_receptor','LR')
interest_gps = pd.Series(interest_gps).str.replace('ligand_receptor_target_gene','LRT').str.replace('ligand_receptor','LR')
sc.pl.MatrixPlot(adata[adata.obs.niche.str.contains('umor')], interest_gps, groupby='niche', cmap='viridis', standard_scale='var').show()
rcParams['figure.figsize'] = (4, 3)

### Tumor niches donor 9

In [None]:
significant_gps=pd.read_csv(f"{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference/19102023_172844_43/gpTest_tumor_clusters_s9_0_r0.45.csv")
significant_gps = significant_gps[~significant_gps.gene_program.str.contains('Add-on')]

In [None]:
interest_gps = significant_gps.gene_program.values
invert = []

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
# not executed to reduce notebook size
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

### Stroma niches

In [None]:
files = glob.glob(f"{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference/19102023_172844_43/gpTest_stroma_clusters_*_r0.45.csv")
significant_gps = []
for f in files: 
    significant_gps.append(pd.read_csv(f, index_col=0))
significant_gps = pd.concat(significant_gps, axis=0)
significant_gps = significant_gps[~significant_gps.gene_program.str.contains('Add-on')]
print(len(significant_gps))
significant_gps.head()

In [None]:
interest_gps = significant_gps.gene_program.values
invert = []

# overwrite to selected GPs
interest_gps = ['CD59_ligand_receptor_GP','Ins1_ligand_receptor_target_gene_GP','L-Glutamine_metabolite_enzyme_sensor_GP','Sbpl_ligand_receptor_target_gene_GP']

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

In [None]:
rcParams['figure.figsize'] = (8, 6)
adata.obs.columns = adata.obs.columns.str.replace('ligand_receptor_target_gene','LRT')
adata.obs.columns = adata.obs.columns.str.replace('ligand_receptor','LR')
adata.obs.columns = adata.obs.columns.str.replace('metabolite_enzyme_sensor','MES')
interest_gps = pd.Series(interest_gps).str.replace('ligand_receptor_target_gene','LRT').str.replace('ligand_receptor','LR').str.replace('metabolite_enzyme_sensor','MES')
sc.pl.MatrixPlot(adata[~adata.obs.niche.str.contains('umor')], interest_gps, groupby='niche', cmap='viridis', standard_scale='var').show()
rcParams['figure.figsize'] = (4, 3)

In [None]:
# selected genes
sc.pl.umap(adata, color=['CXCR4', 'GLUL', 'CD59', 'INSR'], ncols=2, size=2.5, legend_fontsize='x-large', show=True, frameon=False, cmap='PuRd')

### Neutrophil niches
infiltrative vs expansions

In [None]:
significant_gps=pd.read_csv(f"{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference/19102023_172844_43/gpTest_neutrophil_clusters_4_r0.45.csv").gene_program.values.tolist()
significant_gps = significant_gps[~significant_gps.gene_program.str.contains('Add-on')]

In [None]:
interest_gps = significant_gps.gene_program.values
invert = []

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
# not executed to reduce notebook size
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

# Reference 43 + donor 5 r3

## Prepare for analysis

In [None]:
adata, adata_path = load_adata(suffix='43_3', model='reference_query_mapping')
figure_path = adata_path.replace('models','figures').replace('/nanostring_cosmx_human_nsclc_reference_query_mapping.h5ad','')
gc.collect()
print(figure_path)
adata

### Select resolution

In [None]:
plot_latent(adata, model='reference_query_mapping')

In [None]:
# sort batch colors
adata.uns['batch_colors'] = list(np.array(adata.uns['batch_colors'])[[0,1,6,2,3,4,5]])
sc.pl.umap(adata, color=['batch'], ncols=4, wspace=0.3, size=0.5)

In [None]:
latent_leiden_resolution=0.35
res_details(adata, resolution=latent_leiden_resolution, model='reference_query_mapping')

In [None]:
sc.set_figure_params(dpi=200, figsize=(4,3))
sc.pl.umap(adata[adata.obs.mapping_entity=='reference'], color=['mapping_entity'], ncols=1, wspace=0.3, size=0.5,frameon=False)
sc.pl.umap(adata, color=['mapping_entity'], ncols=1, wspace=0.3, size=0.5,frameon=False)
sc.set_figure_params(fontsize=12, dpi=100, figsize=(4,3))

In [None]:
leiden2niche = {
    '2': '1- Tumor (stroma border)', '4': '2- Tumor interior', '8': '3- Tumor (neutrophil border)', '5': '4- Tumor interior', '11': '5- Infiltrated tumor',
    '7': '6- Neutrophil expansion', '6': '7- Myeloid cells rich stroma', '10': '8- Stroma', '1': '9- Plasmablast rich stroma', '9': '10- Plasmablast rich stroma', '3': '11- Lymphoid aggregates', '10': '12- Macrophage rich stroma'
}

adata.obs['niche'] = adata.obs['latent_leiden_0.35'].map(leiden2niche)

general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
tumor=colorFader(general[2], c2='#FFFFFF', n=5, mix=0)
lymphoid=general[4]
myeloid=general[1]
Blike=colorFader(general[0], c2='#FFFFFF', n=2, mix=0)
neutrophil=colorFader(general[3], c2='#FFFFFF', n=2, mix=0)
stroma=general[5]

leiden_col = [tumor[0], tumor[1], tumor[2], tumor[3], tumor[4], neutrophil[0], neutrophil[1], stroma,  Blike[0],  Blike[1], lymphoid, myeloid]

adata.uns['latent_leiden_0.35_colors'] = leiden_col
adata.uns['niche_colors'] = leiden_col

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['niche'], ncols=1, wspace=1, size=2.5, legend_fontsize='x-large', show=False, frameon=False, title=[''])
# plt.savefig(f"{figure_path}/d1.pdf", bbox_inches="tight", format='pdf')
rcParams['figure.figsize'] = (4, 3)

In [None]:
cluster_var = 'batch'
plot_var = 'latent_leiden_0.35'
props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(8,3))
# plt.savefig(f"{figure_path}/d3_prop.pdf", bbox_inches="tight", format='pdf')


In [None]:
if not 'cluster_grups' in adata.uns:
    adata.uns['cluster_groups'] = {}
    
adata.uns['cluster_groups'][f"latent_leiden_{latent_leiden_resolution}"] = {
    'tumor_clusters': ['2','4','5','8','11'],
    'stroma_clusters': ['0','1'],
    'neutrophil_clusters': ['6','7'],
    'macrophage_clusters': ['10'],
    'lymphoid_clusters': ['3','9']
}

In [None]:
adata.write_h5ad(adata_path)
gc.collect()

## Compute integration metric

In [None]:
from sklearn.metrics import silhouette_score
nc_silouette = silhouette_score(X=adata[adata.obs.patient == 'Lung5'].obsm['nichecompass_latent'], labels=adata[adata.obs.patient == 'Lung5'].obs['mapping_entity'])
print(nc_silouette)

## Label Transfer

In [None]:
del adata
gc.collect()

In [None]:
model_label = '19102023_172844_43_3'
batch_key = 'fov'
latent_key = 'X_pca'
within_sample = True

dataset = 'nanostring_cosmx_human_nsclc'
model_type = 'reference_query_mapping'
cell_type_key='cell_type'
# batch_key='mapping_entity'
# latent_keys=['nichecompass_latent','X_pca']

# load model
adata = sc.read_h5ad(f'/lustre/groups/imm01/workspace/irene.bonafonte/Projects/2023May_nichecompass/nichecompass-reproducibility/artifacts/{dataset}/models/{model_type}/{model_label}/{dataset}_{model_type}.h5ad')
adata.obs_names = adata.obs['cell_ID'].values
adata.obs[cell_type_key] = adata.obs['cell_type_original'].map({
    'tumor 9': 'tumor', 'tumor 6': 'tumor', 'tumor 5': 'tumor', 'tumor 13': 'tumor', 'tumor 12': 'tumor',
    'fibroblast': 'fibroblast', 'neutrophil': 'neutrophil', 'plasmablast': 'plasmablast', 'B-cell': 'B-cell', 'endothelial': 'endothelial', 'epithelial': 'epithelial',
    'T CD4 naive': 'T cell/NK', 'T CD4 memory': 'T cell/NK', 'T CD8 naive': 'T cell/NK', 'T CD8 memory': 'T cell/NK', 'Treg': 'T cell/NK', 'NK': 'T cell/NK',
    'mDC': 'DC/monocyte', 'pDC': 'DC/monocyte', 'monocyte': 'DC/monocyte', 'macrophage': 'macrophage', 'mast': 'mast'
}).astype("category")


# keep only those from the same replicate as the query
if within_sample:
    sample_name = adata.obs.loc[adata.obs.mapping_entity == 'query','patient'].unique()[0]
    batches = adata.obs.loc[adata.obs.patient == sample_name,'batch'].unique()
    if len(batches) > 1:
        adata = adata[adata.obs.patient == sample_name]

In [None]:
if latent_key == 'X_pca':
    sc.pp.scale(adata)
    sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color=['cell type', 'batch','mapping_entity'], ncols=3, wspace=0.5)

In [None]:
adata.obs['fovbatch'] = np.nan
adata.obs.loc[adata.obs.fov == 'lung5_rep1_1','fovbatch'] = adata.obs.loc[adata.obs.fov == 'lung5_rep1_1','fov']
adata.obs.loc[adata.obs.fov == 'lung5_rep2_1','fovbatch'] = adata.obs.loc[adata.obs.fov == 'lung5_rep2_1','fov']
adata.obs.loc[adata.obs.fov == 'lung5_rep3_1','fovbatch'] = adata.obs.loc[adata.obs.fov == 'lung5_rep3_1','fov']
sc.pl.umap(adata, color=['fovbatch'], size=1.5, frameon=False)


In [None]:
adata = sc.read_h5ad(f'/lustre/groups/imm01/workspace/irene.bonafonte/Projects/2023May_nichecompass/nichecompass-reproducibility/artifacts/{dataset}/models/{model_type}/{model_label}/{dataset}_{model_type}.h5ad')
if within_sample:
    sample_name = adata.obs.loc[adata.obs.mapping_entity == 'query','patient'].unique()[0]
    batches = adata.obs.loc[adata.obs.patient == sample_name,'batch'].unique()
    if len(batches) > 1:
        adata = adata[adata.obs.patient == sample_name]
adata

In [None]:
sc.pl.umap(adata, color=['cell type', 'batch','mapping_entity'], ncols=3, wspace=0.5)

In [None]:
adata.obs['fovbatch'] = np.nan
adata.obs.loc[adata.obs.fov == 'lung5_rep1_1','fovbatch'] = adata.obs.loc[adata.obs.fov == 'lung5_rep1_1','fov']
adata.obs.loc[adata.obs.fov == 'lung5_rep2_1','fovbatch'] = adata.obs.loc[adata.obs.fov == 'lung5_rep2_1','fov']
adata.obs.loc[adata.obs.fov == 'lung5_rep3_1','fovbatch'] = adata.obs.loc[adata.obs.fov == 'lung5_rep3_1','fov']
sc.pl.umap(adata, color=['fovbatch'], size=1.5, frameon=False)

In [None]:
from sklearn.metrics import silhouette_score
nc_silouette = silhouette_score(X=adata[adata.obs.patient == 'Lung5'].obsm['nichecompass_latent'], labels=adata[adata.obs.patient == 'Lung5'].obs['mapping_entity'])
print(nc_silouette)
pca_silouette = silhouette_score(X=adata[adata.obs.patient == 'Lung5'].obsm['X_pca'], labels=adata[adata.obs.patient == 'Lung5'].obs['mapping_entity'])
print(pca_silouette)

In [None]:
from nichecompass.models import NicheCompass
model =  NicheCompass.load(dir_path=f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/models/reference_query_mapping/19102023_172844_43_3/',
                  adata=None,
                  adata_file_name='nanostring_cosmx_human_nsclc_reference_query_mapping.h5ad',
                  gp_names_key='nichecompass_gp_names')
query_active_gps = model.get_active_gps()

del model
gc.collect()

In [None]:
adata_ref, adata_path = load_adata(suffix='43', model='reference')
leiden2niche = {
    '0': '1- Tumor (stroma border)', '2': '2- Tumor interior', '5': '3- Tumor (neutrophil border)', '7': '4- Tumor interior', '11': '5- Infiltrated tumor',
    '3': '6- Neutrophil expansion', '4': '7- Myeloid cells rich stroma', '1': '8- Stroma', '6': '9- Plasmablast rich stroma', '9': '10- Plasmablast rich stroma', '8': '11- Lymphoid aggregates', '10': '12- Macrophage rich stroma'
}

adata_ref.obs['niche'] = adata_ref.obs['latent_leiden_0.45'].map(leiden2niche)
adata_ref.obs['niche_0.1'] = adata_ref.obs['latent_leiden_0.1'].map({'0':'lymphoid stroma', '1':'tumor 9,12', '2': 'myeloid stroma', '3': 'tumor 6', '4': 'tumor 5'})
adata_ref.obs['niche_0.1lo'] = adata_ref.obs['latent_leiden_0.1'].map({'0':'lymphoid stroma', '1':'tumor', '2': 'myeloid stroma', '3': 'tumor', '4': 'tumor'})

general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
tumor=colorFader(general[2], c2='#FFFFFF', n=5, mix=0)
lymphoid=general[4]
myeloid=general[1]
Blike=colorFader(general[0], c2='#FFFFFF', n=2, mix=0)
neutrophil=colorFader(general[3], c2='#FFFFFF', n=2, mix=0)
stroma=general[5]


leiden_colDict = {
    '0': tumor[0], '2': tumor[1], '5': tumor[2], '7': tumor[3], '11': tumor[4], 
    '3': neutrophil[0], '4': neutrophil[1], 
    '1': stroma, '6': Blike[0], '9': Blike[1], '8': lymphoid, '10': myeloid 
}
adata_ref.uns['latent_leiden_0.45_colors'] = [x for x in leiden_colDict.values()]
adata_ref.uns['leidenOrd_colors'] = [x for x in leiden_colDict.values()]
adata_ref.uns['niche_colors'] = [x for x in leiden_colDict.values()]

colDict = {cl: color for color, cl in zip(adata_ref.uns['niche_colors'], adata_ref.obs.niche.cat.categories)}

adata_ref

In [None]:
ref_active_gps = adata_ref.uns['nichecompass_active_gp_names']
ref_obs = adata_ref.obs
gps = list(set(ref_active_gps).intersection(set(query_active_gps)))
adata_ref.obsm['nichecompass_latent'] = adata_ref.obsm['nichecompass_latent'][:,pd.Series(ref_active_gps).isin(gps)]
gc.collect()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=50, weights='distance')
knn.fit(X=adata_ref.obsm['nichecompass_latent'], y=adata_ref.obs[['niche','niche_0.1','niche_0.1lo']])

In [None]:
del adata_ref
gc.collect()

In [None]:
adata_query, adata_path = load_adata(suffix='43_3', model='reference_query_mapping')
adata_query.obs['mapping_entity'].value_counts()
adata_query  = adata_query[adata_query.obs.mapping_entity=='query']
adata_query.obsm['nichecompass_latent'] = adata_query.obsm['nichecompass_latent'][:,pd.Series(query_active_gps).isin(gps)]
gc.collect()

In [None]:
proba = knn.predict_proba(adata_query.obsm['nichecompass_latent'])

In [None]:
k_dist, k_indx = knn.kneighbors(adata_query.obsm['nichecompass_latent'], n_neighbors=50, return_distance=True)

In [None]:
predictions = proba[0]
predictions = pd.DataFrame({'predlabel': np.argmax(predictions, axis=1), 'probability': np.max(predictions, axis=1), 'mean_dist': np.mean(k_dist, axis=1), 'k_dist': k_dist[:,49]})
predictions['predlabel'] = predictions['predlabel'].map({i: l for i, l in enumerate(knn.classes_[0])})
predictions.index = adata_query.obs.index
predictions

In [None]:
# predictions.to_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_3/label_transfer.csv')
predictions = pd.read_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_3/label_transfer.csv', index_col=0)

In [None]:
predictions.probability.hist()

In [None]:
(predictions.probability < 0.7).value_counts()/(74045+17646)

In [None]:
predictions['predlabel'].value_counts()/predictions.shape[0]

In [None]:
keep = predictions['predlabel'].value_counts().index[predictions['predlabel'].value_counts()/predictions.shape[0] > 0.05]
keep

In [None]:
adata_query.obs = pd.concat([adata_query.obs, predictions], axis=1)

In [None]:
adata_query.obs.predlabel = pd.Categorical(adata_query.obs.predlabel, categories=colDict.keys())
adata_query.uns['predlabel_colors'] = [colDict[cl] for cl in adata_query.obs.predlabel.cat.categories]

In [None]:
adata_query = adata_query[adata_query.obs.predlabel.isin(keep)]

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12,6))
ax = sc.pl.umap(adata_query, color=['mean_dist'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=170, vmin=50, ax=axes[0,0], show=False)
ax = sc.pl.umap(adata_query, color=['k_dist'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=170, vmin=50, ax=axes[0,1], show=False)
ax = sc.pl.umap(adata_query, color=['probability'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=1, vmin=0, ax=axes[1,0], show=False)
ax = sc.pl.umap(adata_query, color=['predlabel'], size=1.5, frameon=False, ncols=1, wspace=3, ax=axes[1,1], show=False)

fig.tight_layout()

In [None]:
sc.set_figure_params(dpi=200, figsize=(6,4))
sc.pl.umap(adata_query, color=['cell type'], size=2, frameon=False, ncols=1, wspace=3)
sc.pl.umap(adata_query, color=['predlabel'], size=1.5, frameon=False, ncols=1, wspace=3)
sc.pl.umap(adata_query, color=['probability'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=1, vmin=0)
sc.set_figure_params(dpi=80, figsize=(4,3))


In [None]:
#### cluster_var = 'cell type'
plot_var = 'predlabel'
props = adata_query.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
# props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata_query.uns[f'{cluster_var}_colors'], figsize=(8,3))

In [None]:
rcParams['figure.figsize'] = (10, 4)
sns.boxplot(data=adata_query.obs, x='predlabel', y='probability', showfliers = False)
sns.despine(offset=10, trim=True, bottom=True)
plt.xticks(rotation=90)
plt.xlabel("Niche")
plt.ylabel(f'Probability')
plt.legend([],[], frameon=False)
plt.show()
rcParams['figure.figsize'] = (4, 3)

In [None]:
rcParams['figure.figsize'] = (10, 4)
sns.boxplot(data=adata_query.obs, x='predlabel', y='mean_dist', showfliers = False)
sns.despine(offset=10, trim=True, bottom=True)
plt.xticks(rotation=90)
plt.xlabel("Niche")
plt.ylabel(f'Mean distance')
plt.legend([],[], frameon=False)
plt.show()
rcParams['figure.figsize'] = (4, 3)

In [None]:
rcParams['figure.figsize'] = (10, 4)
sns.boxplot(data=adata_query.obs, x='predlabel', y='k_dist', showfliers = False)
sns.despine(offset=10, trim=True, bottom=True)
plt.xticks(rotation=90)
plt.xlabel("Niche")
plt.ylabel(f'K distance')
plt.legend([],[], frameon=False)
plt.show()
rcParams['figure.figsize'] = (4, 3)

# Reference 43 + donor 13 

## Prepare for analysis

In [None]:
adata, adata_path = load_adata(suffix='43_8', model='reference_query_mapping')
figure_path = adata_path.replace('models','figures').replace('/nanostring_cosmx_human_nsclc_reference_query_mapping.h5ad','')
gc.collect()
print(figure_path)
adata

### Select resolution

In [None]:
plot_latent(adata, model='reference_query_mapping')

In [None]:
latent_leiden_resolution=0.7
res_details(adata, resolution=latent_leiden_resolution, model='reference_query_mapping')

In [None]:
sc.set_figure_params(dpi=200, figsize=(4,3))
sc.pl.umap(adata[adata.obs.mapping_entity=='reference'], color=['mapping_entity'], ncols=1, wspace=0.3, size=0.5,frameon=False)
sc.pl.umap(adata, color=['mapping_entity'], ncols=1, wspace=0.3, size=0.5,frameon=False)
sc.set_figure_params(fontsize=12, dpi=100, figsize=(4,3))

### Dendogram

In [None]:
latent_leiden_resolution = 0.7
sc.tl.dendrogram(adata, groupby=f"latent_leiden_{latent_leiden_resolution}", use_rep='nichecompass_latent', n_pcs=adata.obsm['nichecompass_latent'].shape[1])
adata.obs[f'latent_leiden_{latent_leiden_resolution}'] = adata.obs[f'latent_leiden_{latent_leiden_resolution}'].cat.reorder_categories(adata.uns[f'dendrogram_latent_leiden_{latent_leiden_resolution}']['categories_ordered'], ordered=False)

In [None]:
rcParams['figure.figsize'] = (4,1.5)
sc.pl.dendrogram(adata, groupby=f"latent_leiden_{latent_leiden_resolution}", orientation='bottom', show=False)
rcParams['figure.figsize'] = (4, 3)

### Labels and palette

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['latent_leiden_0.7'], size=2.5, legend_loc='on data', frameon=False)
rcParams['figure.figsize'] = (4, 3)

In [None]:
leiden2niche = {
    '0': '1- Tumor (stroma border)', '8': '1- Tumor (stroma border)', '1': '2- Tumor interior', '5': '3- Tumor (neutrophil border)', '7': '4- Tumor interior', '19': '4- Tumor interior', 
    '17': '5- Infiltrated tumor', '12': '6- Neutrophil expansion', '6': '7- Myeloid cells rich stroma', '13': '8- Stroma', '14': '8- Stroma', '4': '8- Stroma', '15': '8- Stroma', '11': '9- Plasmablast rich stroma', 
    '10': '10- Plasmablast rich stroma', '2': '11- Lymphoid aggregates', 
    '9': '15- Tumor (macrophage infiltrated)', '18': '13- Infiltrating macrophages', '16': '12- Macrophage rich stroma', '3': '14- Immune rich stroma'
}

adata.obs['niche'] = adata.obs['latent_leiden_0.7'].map(leiden2niche)
adata.obs['leidenOrd'] = adata.obs['niche'].apply(lambda x: x.split('-')[0])

In [None]:
if not 'cluster_grups' in adata.uns:
    adata.uns['cluster_groups'] = {}
    
adata.uns['cluster_groups'][f"latent_leiden_0.7"] = {
    'tumor_clusters': ['9','0','1','5','7','8','17','19'],
    'macrophage_clusters': ['18','6','15','16'],
}

adata.uns['cluster_groups'][f"leidenOrd"] = {
    'tumor_clusters': ['1', '2', '3', '4', '5','15'],
    'stroma_clusters': ['6', '7', '8', '9', '10', '11', '12','13','14'],
    'neutrophil_clusters': ['6', '7'],
    'macrophage_clusters': ['12','13'],
    'lymphoid_clusters': ['9', '10', '11','14']
}

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['niche','leidenOrd'], size=2.5, legend_loc='on data', frameon=False)
rcParams['figure.figsize'] = (4, 3)


In [None]:
rcParams['figure.figsize'] = (4, 0.5)
general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
a=np.outer(np.ones(len(general)),np.arange(0,1,0.01))   # pseudo image data
plt.imshow(a,aspect='auto',cmap=plt.get_cmap('Dark2'),origin="lower")
rcParams['figure.figsize'] = (4, 3)

In [None]:
general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
tumor=colorFader(general[2], c2='#FFFFFF', n=6, mix=0)
lymphoid=general[4]
myeloid=colorFader(general[1], c2='#FFFFFF', n=2, mix=0)
Blike=colorFader(general[0], c2='#FFFFFF', n=2, mix=0)
neutrophil=colorFader(general[3], c2='#FFFFFF', n=2, mix=0)
stroma=colorFader(general[5], c2='#FFFFFF', n=2, mix=0)


leiden_colDict = {
    '1': tumor[0], '2': tumor[1], '3': tumor[2], '4': tumor[3], '5': tumor[4],
    '7': neutrophil[0], '6': neutrophil[1], 
    '8': stroma[0], '10': Blike[0], '9': Blike[1], '11': lymphoid, '12': myeloid[0],
    '13': myeloid[1], '14': stroma[1], '15': tumor[5]
}
# adata.uns['latent_leiden_0.7_colors'] = [x for x in leiden_colDict.values()]
adata.uns['leidenOrd_colors'] = [x for x in leiden_colDict.values()]
adata.uns['niche_colors'] = [x for x in leiden_colDict.values()]

## Overview plots

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['niche','leidenOrd'], size=2.5, legend_loc='on data', frameon=False)
rcParams['figure.figsize'] = (4, 3)

In [None]:
rcParams['figure.figsize'] = (8, 6)
sc.pl.umap(adata, color=['niche'], size=2.5, frameon=False)
rcParams['figure.figsize'] = (4, 3)

In [None]:
n = adata.obs.batch.nunique()
fig, axes = plt.subplots(nrows=2, ncols=n, figsize=(8*n,6*2))

for i, batch in enumerate(adata.obs.batch.unique()):
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['cell type'], size=6, legend_loc=None, frameon=False, title=[''], ax=axes[0,i], show=False)
    ax = sc.pl.embedding(adata[adata.obs.batch==batch], basis="spatial", color=['leidenOrd'], size=6, legend_loc=None, frameon=False, title=[''], ax=axes[1,i], show=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.01, hspace=0.01)
# plt.savefig(f"{figure_path}/histology_h.pdf", bbox_inches="tight", format='pdf')

In [None]:
cluster_var = 'cell type'
plot_var = 'leidenOrd'
props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(8,3))
# plt.savefig(f"{figure_path}/a3_prop.pdf", bbox_inches="tight", format='pdf')

In [None]:
cluster_var = 'batch'
plot_var = 'leidenOrd'
props = adata.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata.uns[f'{cluster_var}_colors'], figsize=(8,3))
# plt.savefig(f"{figure_path}/a3_prop.pdf", bbox_inches="tight", format='pdf')


## Neighborhood composition

In [None]:
from sklearn.neighbors import NearestNeighbors
leiden_col_key='leidenOrd'
n = 25
# compute n neighbours per cell type
knn = {}
cell_counts = {}
for b in adata.obs.batch.unique():
    X = adata[adata.obs.batch==b].obsm['spatial']
    celltypes = adata[adata.obs.batch==b].obs['cell type'].astype(str).values.astype('<U22')    
    cellnames = adata[adata.obs.batch==b].obs_names

    knn[b] = NearestNeighbors(n_neighbors=n)
    knn[b].fit(X)
    knn[b] = knn[b].kneighbors(X, return_distance=False)    
    knn[b] = celltypes[knn[b]]

    for i in range(len(cellnames)):
        unique, counts = np.unique(knn[b][i,:], return_counts=True)
        cell_counts[cellnames[i]] = dict(zip(unique, counts))

adata.obsm[f'k{n}_neighbours_celltype'] = pd.DataFrame(cell_counts).T.fillna(0)

In [None]:
rcParams['figure.figsize'] = (10, 4)
leiden_col_key='leidenOrd'
n=25
rcParams['figure.figsize'] = (4, 3)
for structure in ['tumor_clusters','stroma_clusters','macrophage_clusters']:
    groups = adata.uns['cluster_groups'][leiden_col_key][structure]
    # format for plotting and keep only those with an interesting number
    dt = adata.obsm[f'k{n}_neighbours_celltype']
    dt[leiden_col_key] = adata.obs[leiden_col_key]
    dt = dt.loc[dt[leiden_col_key].isin(groups),:]
    dt[leiden_col_key] = dt[leiden_col_key].cat.remove_unused_categories()
    dt = dt.reset_index()
    dt = pd.melt(dt, id_vars=['index',leiden_col_key], value_name=f'n NN/{n}', var_name='cell type')
    dt['cell type'] = pd.Categorical(dt['cell type'], categories=adata.obs['cell type'].cat.categories)
    if structure == 'tumor_clusters':
        include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.05) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
        rcParams['figure.figsize'] = (len(groups)*1.5, 4)
    else:
        include = (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() > n*0.15) & (dt.groupby(['cell type',leiden_col_key])[f'n NN/{n}'].mean().groupby(['cell type']).max() < n*0.6)
        rcParams['figure.figsize'] = (len(groups)*2, 4)
    dt = dt[dt['cell type'].isin(include[include].index.values)]
    dt['cell type'] = dt['cell type'].cat.remove_unused_categories()

    # plot
    sns.boxplot(data=dt, x=leiden_col_key, y=f'n NN/{n}', hue='cell type', palette=adata.uns[f'cell type_colors'][include.values], showfliers = False).set(title='')
    plt.ylim((0, 25))
    plt.xticks(rotation=0)
    plt.xlabel("", fontsize=17.5)
    plt.ylabel(f'neighborhood composition', fontsize=17.5)
    # plt.legend([],[], frameon=False)
    plt.legend(frameon=False, loc='center left', bbox_to_anchor=(1, 0.7), fontsize=15)
    plt.tick_params(bottom=False, labelsize=17.5)
    sns.despine(offset=10, trim=True, bottom=True)
    plt.show()
rcParams['figure.figsize'] = (4, 3)

## Compare GEX

In [None]:
adata.uns['cluster_groups']

In [None]:
rcParams['figure.figsize'] = (8, 3)
sc.tl.rank_genes_groups(adata, 'leidenOrd', groups=['2','1','3','5','4','15'])
x = pd.DataFrame({'gene': adata.uns['rank_genes_groups']['names']['15'], 'padj': adata.uns['rank_genes_groups']['pvals_adj']['15'], 'logFC': adata.uns['rank_genes_groups']['logfoldchanges']['15']})
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
rcParams['figure.figsize'] = (4, 3)


In [None]:
x[x.gene.isin(['SPP1','EGFR','CD44','ITGAV','ITGB1','VEGFA','NFKB1','HIF1A','JUN','AKT1'])]

In [None]:
x[(x.logFC > 0) & (x.padj < 0.05)].gene.values

In [None]:
sc.pl.umap(adata, color=['HLA-DRA','CCL5','SPP1','TGFB2'], ncols=4, wspace=0.3, size=0.5)

In [None]:
sc.pl.umap(adata, color=['CD52'], ncols=4, wspace=0.3, size=0.5)

In [None]:
for structure, clusters in adata.uns['cluster_groups'][f"latent_leiden_{latent_leiden_resolution}"].items():
    print(structure)
    sc.tl.rank_genes_groups(adata, f"latent_leiden_{latent_leiden_resolution}", groups=clusters)
    sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
sc.tl.rank_genes_groups(adata, f"latent_leiden_{latent_leiden_resolution}", groups=['18'], reference='16')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
sc.tl.rank_genes_groups(adata, f"latent_leiden_{latent_leiden_resolution}", groups=['16'], reference='18')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
sc.pl.embedding(adata[adata.obs.batch=='lung13'], basis="spatial", color=['MARCO','FABP4','SPP1','CCL2'], ncols=4, wspace=0.3, size=2)

In [None]:
sc.pl.embedding(adata[adata.obs.batch=='lung6'], basis="spatial", color=['MARCO','FABP4','SPP1','CCL2'], ncols=4, wspace=0.3, size=2)

In [None]:
sc.pl.embedding(adata[adata.obs.batch==''], basis="spatial", color=['PDCD1','CD274'], ncols=4, wspace=0.3, size=2)

## Mapping

In [None]:
model =  NicheCompass.load(dir_path=f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/models/reference_query_mapping/19102023_172844_43_8/',
                  adata=None,
                  adata_file_name='nanostring_cosmx_human_nsclc_reference_query_mapping.h5ad',
                  gp_names_key='nichecompass_gp_names')
query_active_gps = model.get_active_gps()
adata_ref, adata_path = load_adata(suffix='43', model='reference')

gc.collect()

In [None]:
adata_ref, adata_path = load_adata(suffix='43', model='reference')
leiden2niche = {
    '0': '1- Tumor (stroma border)', '2': '2- Tumor interior', '5': '3- Tumor (neutrophil border)', '7': '4- Tumor interior', '11': '5- Infiltrated tumor',
    '3': '6- Neutrophil expansion', '4': '7- Myeloid cells rich stroma', '1': '8- Stroma', '6': '9- Plasmablast rich stroma', '9': '10- Plasmablast rich stroma', '8': '11- Lymphoid aggregates', '10': '12- Macrophage rich stroma'
}

adata_ref.obs['niche'] = adata_ref.obs['latent_leiden_0.45'].map(leiden2niche)
adata_ref.obs['niche_0.1'] = adata_ref.obs['latent_leiden_0.1'].map({'0':'lymphoid stroma', '1':'tumor 9,12', '2': 'myeloid stroma', '3': 'tumor 6', '4': 'tumor 5'})
adata_ref.obs['niche_0.1lo'] = adata_ref.obs['latent_leiden_0.1'].map({'0':'lymphoid stroma', '1':'tumor', '2': 'myeloid stroma', '3': 'tumor', '4': 'tumor'})

general=np.apply_along_axis(to_hex, 1, np.array(plt.get_cmap('Dark2').colors))
tumor=colorFader(general[2], c2='#FFFFFF', n=5, mix=0)
lymphoid=general[4]
myeloid=general[1]
Blike=colorFader(general[0], c2='#FFFFFF', n=2, mix=0)
neutrophil=colorFader(general[3], c2='#FFFFFF', n=2, mix=0)
stroma=general[5]


leiden_colDict = {
    '0': tumor[0], '2': tumor[1], '5': tumor[2], '7': tumor[3], '11': tumor[4], 
    '3': neutrophil[0], '4': neutrophil[1], 
    '1': stroma, '6': Blike[0], '9': Blike[1], '8': lymphoid, '10': myeloid 
}
adata_ref.uns['latent_leiden_0.45_colors'] = [x for x in leiden_colDict.values()]
adata_ref.uns['leidenOrd_colors'] = [x for x in leiden_colDict.values()]
adata_ref.uns['niche_colors'] = [x for x in leiden_colDict.values()]

colDict = {cl: color for color, cl in zip(adata_ref.uns['niche_colors'], adata_ref.obs.niche.cat.categories)}

adata_ref

In [None]:
ref_active_gps = adata_ref.uns['nichecompass_active_gp_names']
ref_obs = adata_ref.obs
gps = list(set(ref_active_gps).intersection(set(query_active_gps)))
adata_ref.obsm['nichecompass_latent'] = adata_ref.obsm['nichecompass_latent'][:,pd.Series(ref_active_gps).isin(gps)]
gc.collect()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=50, weights='distance')
knn.fit(X=adata_ref.obsm['nichecompass_latent'], y=adata_ref.obs[['niche','niche_0.1','niche_0.1lo']])

In [None]:
del adata_ref
gc.collect()

In [None]:
adata_query, adata_path = load_adata(suffix='43_8', model='reference_query_mapping')
adata_query.obs['mapping_entity'].value_counts()
adata_query  = adata_query[adata_query.obs.mapping_entity=='query']
adata_query.obsm['nichecompass_latent'] = adata_query.obsm['nichecompass_latent'][:,pd.Series(query_active_gps).isin(gps)]
gc.collect()

In [None]:
proba = knn.predict_proba(adata_query.obsm['nichecompass_latent'])
k_dist, k_indx = knn.kneighbors(adata_query.obsm['nichecompass_latent'], n_neighbors=50, return_distance=True)

In [None]:
predictions = proba[0]
predictions = pd.DataFrame({'predlabel': np.argmax(predictions, axis=1), 'probability': np.max(predictions, axis=1), 'mean_dist': np.mean(k_dist, axis=1), 'k_dist': k_dist[:,49]})
predictions['predlabel'] = predictions['predlabel'].map({i: l for i, l in enumerate(knn.classes_[0])})
predictions.index = adata_query.obs.index
predictions.to_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/label_transfer.csv')

predictions

In [None]:
predictions = pd.read_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/label_transfer.csv', index_col=0)
predictions.probability.hist()
plt.show()

In [None]:
(predictions.probability < 0.7).value_counts()/predictions.probability.shape[0]

In [None]:
(predictions.probability < 0.5).value_counts()/predictions.probability.shape[0]

In [None]:
predictions['predlabel'].value_counts()/predictions.shape[0] > 0.05

In [None]:
# simplify labels by keeping only niches that are assigned to at least 5% of the cells
keep = predictions['predlabel'].value_counts().index[predictions['predlabel'].value_counts()/predictions.shape[0] > 0.05]
keep

In [None]:
adata_query.obs = pd.concat([adata_query.obs, predictions], axis=1)

In [None]:
adata_query.obs.predlabel

In [None]:
adata_query.obs.predlabel = pd.Categorical(adata_query.obs.predlabel, categories=colDict.keys())
adata_query.uns['predlabel_colors'] = [colDict[cl] for cl in adata_query.obs.predlabel.cat.categories]

In [None]:
sc.pl.umap(adata_query, color=['mean_dist','k_dist','probability','predlabel'], size=1.5, frameon=False, ncols=2, wspace=1, color_map='viridis')

In [None]:
adata_query = adata_query[adata_query.obs.predlabel.isin(keep)]

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12,6))
ax = sc.pl.umap(adata_query, color=['mean_dist'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=170, vmin=50, ax=axes[0,0], show=False, color_map='viridis')
ax = sc.pl.umap(adata_query, color=['k_dist'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=170, vmin=50, ax=axes[0,1], show=False, color_map='viridis')
ax = sc.pl.umap(adata_query, color=['probability'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=1, vmin=0, ax=axes[1,0], show=False, color_map='viridis')
ax = sc.pl.umap(adata_query, color=['predlabel'], size=1.5, frameon=False, ncols=1, wspace=3, ax=axes[1,1], show=False)

fig.tight_layout()

In [None]:
sc.set_figure_params(dpi=200, figsize=(6,4))
sc.pl.umap(adata_query, color=['cell type'], size=2, frameon=False, ncols=1, wspace=3, title=[''])
sc.pl.umap(adata_query, color=['predlabel'], size=1.5, frameon=False, ncols=1, wspace=3, title=[''])
sc.pl.umap(adata_query, color=['probability'], size=1.5, frameon=False, ncols=1, wspace=3, vmax=1, vmin=0, colorbar_loc='bottom', title=[''], color_map='viridis')
sc.set_figure_params(dpi=100, figsize=(4,3))

In [None]:
cluster_var = 'cell type'
plot_var = 'predlabel'
props = adata_query.obs.groupby([cluster_var, plot_var]).size().reset_index()
props = props.pivot(columns=plot_var, index=cluster_var).T
props.index = props.index.droplevel(0)
props.fillna(0, inplace=True)
# props = props.div(props.sum(axis=1), axis=0)*100 
fig = plot_cluster_proportions(props, xlabel_rotation=90, cluster_palette=adata_query.uns[f'{cluster_var}_colors'], figsize=(8,3))

In [None]:
adata_query.uns['niche_colors']

In [None]:
rcParams['figure.figsize'] = (10, 4)
sns.boxplot(data=adata_query.obs, x='predlabel', y='probability', showfliers = False, palette=adata_query.uns['predlabel_colors'])
sns.despine(offset=10, trim=True, bottom=True)
plt.xticks(rotation=90)
plt.xlabel("Niche")
plt.ylabel(f'Probability')
plt.legend([],[], frameon=False)
plt.show()
rcParams['figure.figsize'] = (4, 3)

In [None]:
rcParams['figure.figsize'] = (10, 4)
sns.boxplot(data=adata_query.obs, x='predlabel', y='mean_dist', showfliers = False, palette=adata_query.uns['predlabel_colors'])
sns.despine(offset=10, trim=True, bottom=True)
plt.xticks(rotation=90)
plt.xlabel("Niche")
plt.ylabel(f'Mean distance')
plt.legend([],[], frameon=False)
plt.show()
rcParams['figure.figsize'] = (4, 3)

In [None]:
rcParams['figure.figsize'] = (10, 4)
sns.boxplot(data=adata_query.obs, x='predlabel', y='k_dist', showfliers = False, palette=adata_query.uns['predlabel_colors'])
sns.despine(offset=10, trim=True, bottom=True)
plt.xticks(rotation=90)
plt.xlabel("Niche")
plt.ylabel(f'Distance to k')
plt.legend([],[], frameon=False)
plt.show()
rcParams['figure.figsize'] = (4, 3)

## GPs

### Tumor niche

In [None]:
significant_gps=pd.read_csv(f"{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/gpTest_tumor_clusters_9_r0.7.csv")
significant_gps = significant_gps[~significant_gps.gene_program.str.contains('Add-on')]
interest_gps = significant_gps.gene_program.values
invert = []

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
# output not shown to reduce notebook size
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

### Macrophage niche

In [None]:
significant_gps=pd.read_csv(f"{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/gpTest_macrophage_clusters_18_r0.7.csv")
significant_gps = significant_gps[~significant_gps.gene_program.str.contains('Add-on')]
interest_gps = significant_gps.gene_program.values
invert = []

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
# output not shown to reduce notebook size
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

### SPP1 pathway

In [None]:
interest_gps = ['SPP1_ligand_receptor_GP', 'Spp1_ligand_receptor_target_gene_GP']
invert = ['SPP1_ligand_receptor_GP']

# Create active gene program df
interest_gp_df = pd.DataFrame(adata.obsm['nichecompass_latent'][:,pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values],
                            columns=adata.uns['nichecompass_active_gp_names'][pd.Series(adata.uns['nichecompass_active_gp_names']).isin(interest_gps).values])
interest_gp_df = interest_gp_df.set_index(adata.obs.index)

# Drop columns if they are already in ´adata.obs´ and invert if apropriate
for gp in interest_gps:
    if gp in adata.obs:
        adata.obs.drop(gp, axis=1, inplace=True)
        
    if gp in invert:
        interest_gp_df[gp] = -interest_gp_df[gp]

# Concatenate active gene program df horizontally to ´adata.obs´
adata.obs = pd.concat([adata.obs, interest_gp_df], axis=1)

In [None]:
for gp in interest_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    source = adata.var_names[(adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 1) |  (adata.varm['nichecompass_gp_sources_categories'][:,gp_idx] == 2)].tolist()
    targets = adata.var_names[(adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 3) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 2) | (adata.varm['nichecompass_gp_targets_categories'][:,gp_idx] == 1)].tolist()
    sc.pl.umap(adata, color=gp, ncols=5, size=0.5, frameon=False, cmap='RdGy_r')
    if len(source+targets) > 0:
        sc.pl.umap(adata, color=source+targets, ncols=5, size=0.5, frameon=False, cmap='RdPu')

In [None]:
rcParams['figure.figsize'] = (8, 6)

for gp in significant_gps:
    gp_idx = adata.uns['nichecompass_gp_names'].tolist().index(gp)
    sc.pl.umap(adata, color=[gp], ncols=2, size=2.5, frameon=False, cmap='RdGy_r')
    
rcParams['figure.figsize'] = (4, 3)

In [None]:
adata.uns['cluster_groups']

In [None]:
sc.pl.violin(adata, color=['SPP1','EGFR','CD44','ITGAV','ITGB1'], group_by='niche')


In [None]:
rcParams['figure.figsize'] = (8, 6)

sc.pl.umap(adata, color=['SPP1','EGFR','CD44','ITGAV','ITGB1'], ncols=2, size=1, frameon=False, cmap='viridis')
sc.pl.umap(adata, color=['SPP1','EGFR','ITGAV'], ncols=3, size=0.5, frameon=False, cmap='RdPu')

    
rcParams['figure.figsize'] = (4, 3)

In [None]:
keep = adata_tmp.obs['niche'].value_counts().index[adata_tmp.obs['niche'].value_counts() > 1000]
adata_tmp = adata_tmp[adata_tmp.obs['niche'].isin(keep)]

In [None]:
rcParams['figure.figsize'] = (6, 4)
sc.pl.dotplot(adata_tmp, markers, groupby='batch', dendrogram=False, cmap="Reds", save='spp1_markers.svg')
rcParams['figure.figsize'] = (6, 4)
sc.pl.umap(adata_tmp, color=['cell type', 'batch', 'niche'], ncols=3, size=2, frameon=False, wspace=0.25, save='spp1_conditions.svg')
sc.pl.umap(adata_tmp, color=['MARCO', 'SPP1', 'IFI27', 'CD9', 'FN1', 'TIMP1', 'COL3A1', 'COL1A1', 'MMP12', 'MMP2'], size=2, frameon=False, cmap='viridis', ncols=4, colorbar_loc=None, save='spp1_markers.svg')

## communication

In [None]:
model =  NicheCompass.load(dir_path=f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/models/reference_query_mapping/19102023_172844_43_8/',
                  adata=None,
                  adata_file_name='nanostring_cosmx_human_nsclc_reference_query_mapping.h5ad',
                  gp_names_key='nichecompass_gp_names')
model.adata

In [None]:
sc.pp.neighbors(model.adata,
                n_neighbors=90,
                use_rep="spatial",
                key_added="spatial_cci")

In [None]:
from nichecompass.utils import compute_communication_gp_network
nx_s13 = compute_communication_gp_network(gp_list=['Spp1_ligand_receptor_target_gene_GP'],  model=model, group_key="latent_leiden_0.7", filter_key='batch', filter_cat='lung13')
nx_s13.to_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/Spp1_ligand_receptor_target_gene_GP_lung13.csv')
nx_s6 = compute_communication_gp_network(gp_list=['Spp1_ligand_receptor_target_gene_GP'],  model=model, group_key="latent_leiden_0.7", filter_key='batch', filter_cat='lung6')
nx_s6.to_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/Spp1_ligand_receptor_target_gene_GP_lung6.csv')

In [None]:
nx_s6 = pd.read_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/Spp1_ligand_receptor_target_gene_GP_lung6.csv', index_col=0)
nx_s13 = pd.read_csv(f'{base_path}/artifacts/nanostring_cosmx_human_nsclc/results/reference_query_mapping/19102023_172844_43_8/Spp1_ligand_receptor_target_gene_GP_lung13.csv', index_col=0)

In [None]:
# re-scale
min_value = min(nx_s6["strength_unscaled"].min(), nx_s13["strength_unscaled"].min())
max_value = max(nx_s6["strength_unscaled"].max(), nx_s13["strength_unscaled"].max())
nx_s13["strength"] = (nx_s13["strength_unscaled"] - min_value) / (max_value - min_value)
nx_s13["strength"] = np.round(nx_s13["strength"], 2)
nx_s6["strength"] = (nx_s6["strength_unscaled"] - min_value) / (max_value - min_value)
nx_s6["strength"] = np.round(nx_s6["strength"], 2)
nx_s6 = nx_s6[nx_s6.strength>0]
nx_s13 = nx_s13[nx_s13.strength>0]


In [None]:
nx_s13.source = nx_s13.source.astype(str).map(leiden2niche)
nx_s13.target = nx_s13.target.astype(str).map(leiden2niche)


In [None]:
from nichecompass.utils import visualize_communication_gp_network
cat = 'latent_leiden_0.7'
cols = {k: c for k, c in zip(adata.obs[cat].cat.categories,adata.uns[f'{cat}_colors'])}
visualize_communication_gp_network(adata, nx, cat_colors=cols, cat_key=cat, edge_type_colors=["#1f77b4"], figsize=(8,8))

In [None]:
visualize_communication_gp_network(adata[adata.obs.batch=='lung6'], nx_s6, cat_colors=cols, cat_key=cat, edge_type_colors=["#1f77b4"], figsize=(3,8))

In [None]:
import altair as alt
nx = nx_s13.copy()
niches = nx.source.unique().tolist() + nx.target.unique().tolist()
base = alt.Chart(nx[nx.strength > 0]).mark_point(
    filled=True,
    size=2000,
    shape='square',
    opacity=0.6,
    strokeWidth=0
).encode(
    x=alt.X('target:O', title=None, axis=alt.Axis(orient='bottom', labelFontSize=15, titleFontSize=15), scale=alt.Scale(domain=niches)),    
    y=alt.Y('source:O', title=None, axis=alt.Axis(labelFontSize=15, titleFontSize=15), scale=alt.Scale(domain=niches)),
    color=alt.Color('strength:Q', scale=alt.Scale(scheme='yellowgreenblue', domain=[0,1]))
).properties(
    width=len(niches)*27,
    height=27*len(niches)
)

text = base.mark_text().encode(
    text='strength:Q',
    color=alt.value("black")
)
base + text

In [None]:
nx = nx_s6.copy()
niches = nx.source.unique().tolist() + nx.target.unique().tolist()
base = alt.Chart(nx[nx.strength > 0]).mark_point(
    filled=True,
    size=2000,
    shape='square',
    opacity=0.6,
    strokeWidth=0
).encode(
    x=alt.X('target:O', title=None, axis=alt.Axis(orient='bottom', labelFontSize=15, titleFontSize=15), scale=alt.Scale(domain=niches)),    
    y=alt.Y('source:O', title=None, axis=alt.Axis(labelFontSize=15, titleFontSize=15), scale=alt.Scale(domain=niches)),
    color=alt.Color('strength:Q', scale=alt.Scale(scheme='yellowgreenblue', domain=[0,1]))
).properties(
    width=len(niches)*27,
    height=27*len(niches)
)

text = base.mark_text().encode(
    text='strength:Q',
    color=alt.value("black")
)
base + text