In [None]:
import scanpy as sc
# import scvelo as scv
from glob import glob
import pandas as pd
import numpy as np
import seaborn as sns
import anndata
import scipy
import re
import os
import matplotlib
import math
import random
import itertools
import gseapy as gp
import sklearn
# from icecream import ic
from statannot import add_stat_annotation
from matplotlib import pyplot as plt
from matplotlib import rcParams
from matplotlib.legend import Legend
import matplotlib.gridspec as gridspec

import generalfunctions as gf
import populationfunctions as pf
import airrfunctions as tf
import dgexfunctions as dgexfunc

%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)
pd.options.display.max_seq_items = 2000

sc.set_figure_params(scanpy=True, dpi=300, dpi_save=300, frameon=True, vector_friendly=True, fontsize=8, 
                         color_map='Dark2', format='pdf', transparent=True, ipython_format='png2x')

rcParams.update({'font.size': 8})
rcParams.update({'font.family': 'Helvetica'})
rcParams['pdf.fonttype'] = 3
rcParams['ps.fonttype'] = 42
rcParams['svg.fonttype'] = 'none'
rcParams['figure.facecolor'] = (1,1,1,1)

import warnings
warnings.filterwarnings("ignore")

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML
display(HTML("""
<style>
#notebook-container {
    width: 100%
}
 
.code_cell {
   flex-direction: row !important;
}
 
.code_cell .input {
    width: 50%
}
 
.code_cell .output_wrapper {
    width: 50%
}
</style>
"""))

In [None]:
## load celltype data

embFiles = {'tcell_filtered':'umap_n-0055_md-0.80_s-2.28.npy',
           'bcell_filtered':'umap_n-0028_md-0.40_s-1.39.npy',
           'myeloid_filtered':'umap_n-0064_md-0.10_s-1.61.npy'}

clustFiles = {'tcell_filtered':'scvi_cugraph_leiden_nbr100_res0.6.npy',
           'bcell_filtered':'scvi_cugraph_leiden_nbr100_res0.6.npy',
           'myeloid_filtered':'scvi_cugraph_leiden_nbr100_res1.0.npy'}


celltype = 'myeloid_filtered'
path = '/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/scvi_outputs/'

adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/h5adfiles/PembroRT_immune_R100_final.h5ad')

BC = np.load(path+'/'+celltype+'/barcodes.npy',allow_pickle=True)
adata = adata[list(BC)].copy()
adata

emb = np.load(path+'/'+celltype+'/'+embFiles[celltype])
clust = np.load(path+'/'+celltype+'/'+clustFiles[celltype])

adata.obsm['X_umap'] = emb
adata.obs['leiden'] = [str(x) for x in clust]
adata.obs.leiden = adata.obs.leiden.astype('category')

metadata = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/PEMBRORT_CLINICAL_METADATA_FORSCSEQ_KHG20210624.csv',index_col=None,header=0)
cols = [x for x in metadata.columns if x not in adata.obs.columns]
metadata = metadata[cols]
adata.obs = adata.obs.reset_index().merge(metadata,left_on='cohort',right_on='Patient_Number',how='left').set_index('index')
# adata.obs.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/metadata_collection/'+celltype+'_obs.csv')

dotsize = (120000/len(adata))*2

sc.pl.umap(adata,color='leiden',size=dotsize,show=False)
# plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/umap_by_leiden.png',dpi=600,bbox_inches='tight')

adata.raw = adata
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000)
sc.pp.log1p(adata)

# ## perform differential gene expression analysis among clusters

# sc.tl.rank_genes_groups(adata,groupby='leiden',use_raw=False,method='wilcoxon',pts=True,tie_correct=True)
# grps = adata.obs.leiden.unique().tolist()

# for i,g in enumerate(grps):
#     tmp = sc.get.rank_genes_groups_df(adata,group=str(g))
#     tmp['group'] = g
#     if (i==0):
#         dgex = tmp
#     else:
#         dgex = dgex.append(tmp)
#     print(str(g))
    
# dgex.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_dgex.csv')

In [None]:
sc.pl.umap(adata,color=['CD276','CCL23','TREM2','FABP4','FABP5','LPL'],use_raw=False,color_map='inferno',vmin=0,vmax='p99',ncols=4,size=dotsize)

In [None]:
GOI = ['TREM2','FABP5','FABP4','LPL','CD276','CCL23']
sc.pl.dotplot(adata,groupby='leiden',var_names=GOI,use_raw=False)

In [None]:
## some key markers

GOI = ['CD68','ITGAM','ITGAX','CLEC4C','CD1C','THBD','BATF3','LAMP3',
      'C1QA','FCGR1A','FCGR1B','CD74','FCGR3B','FCAR','IL1B','TNF']

sc.pl.umap(adata,color=GOI,use_raw=False,color_map='inferno',vmin=0,vmax='p99',ncols=4,size=dotsize)

In [None]:
## fisher test for response heatmap

fig,ax = pf.fisher_response(adata[(adata.obs.pCR!='unknown')],groupby='leiden',clip=4,thresh=None)
plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/fisher_response.svg')

In [None]:
response = ['00','02','03','07','08','11']
nonresponse = ['01','04','05']
nothing = ['06','09','10']

adata.obs['test'] = pd.DataFrame(index=adata.obs.index,columns=['test'])
adata.obs.loc[[x for x in adata.obs.index if adata.obs.loc[x,'leiden'] in response],'test'] = 'response'
adata.obs.loc[[x for x in adata.obs.index if adata.obs.loc[x,'leiden'] in nonresponse],'test'] = 'nonresponse'
adata.obs.loc[[x for x in adata.obs.index if adata.obs.loc[x,'leiden'] in nothing],'test'] = 'nothing'

sc.pl.umap(adata,color='test')

In [None]:
## cluster dgex heatmap

dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_dgex.csv',index_col=0,header=0)
  
ax = pf.dgex_plot(adata,dgex=dgex,groupby='leiden',topn=50,pvalCutoff=0.05,fcCutoff=1,pctCutoff=0.3,use_FDR=True,
               dendro=False,plot_type='scanpy_heatmap',cmap='Blues',figsize=(2,4),fontsize=1)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_dgex_heatmap.svg')

In [None]:
## boxplots of cluster percentages

fig,ax = pf.pct_boxplot(adata,groupby='leiden',rep='cohort',xcat='treatment',hcat='pCR',
                    xorder=['Base','PD1','RTPD1'],horder=['R','NR'],show_stats=True,thresh=None)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_pct_boxplot.svg')


In [None]:
## lineplots of cluster percentages

fig,ax = pf.pct_lineplot(adata,groupby='leiden',rep='cohort',xcat='treatment',hcat='pCR',
                    xorder=['Base','PD1','RTPD1'],horder=['R','NR'],show_stats=True,direction='less',thresh=None)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_pct_lineplot.svg')


In [None]:
## lineplots of cluster percentages

fig,ax = pf.med_lineplot(adata,groupby='leiden',rep='cohort',xcat='treatment',hcat='pCR',
                    xorder=['Base','PD1','RTPD1'],horder=['R','NR'],thresh=None)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_med_lineplot.svg')


In [None]:
## correlation of T-cell cluster percentages with macrophage cluster percentages

df,ax = pf.prediction_query(adata=adata,metadata=None,
                             pred_group='leiden',pred_metric='percent',
                             target_cell='tcell_filtered',target_group='leiden',target_metric='percent',
                             thresh=10,drop_na=False,
                             return_df=True,plot=True,custom_heatmap=True,vmin=-1,vmax=1,fontsize=2,figsize=(2,2))

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/myeloid_tcell_cluster_correlation.svg')

In [None]:
## diffusion component embedding

BC = np.load(path+'/'+celltype+'/barcodes.npy',allow_pickle=True)
adata = adata[list(BC)].copy()

adata = adata[(adata.obs.leiden!='07')&(adata.obs.leiden!='08')&(adata.obs.leiden!='09')]

emb = np.load(path+'/'+celltype+'/diffmap_nbr100_filtered.npy',allow_pickle=True)
adata.obsm['X_diffmap'] = emb

print(adata.shape)

# c = ['1,2','1,3','2,3']
# st = adata.obs.leiden.unique().tolist()
# forder = [(x,y) for x in c for y in st]

# sns.set_style("white", rc={"font.family":"Helvetica","axes.grid":False})                                                  
# sns.set_context("paper", rc={"font.size":4,"axes.titlesize":4,"axes.labelsize":4,"font.family":"Helvetica","xtick.labelsize":4,"ytick.labelsize":4})
# fig,axs = plt.subplots(nrows=len(c),ncols=len(st),sharex=True,sharey=True,figsize=(len(st),len(c)))

# for f,ax in zip(forder,np.ravel(axs)):
#     sc.pl.diffmap(adata,color='leiden',components=f[0],groups=f[1],projection='2d',size=dotsize,ax=ax,title=f[1],show=False)

In [None]:
## correlation of genes with diffusion components followed by GSEA of correlations


## remove non-coding genes
keeplist = ['IGHA1','IGHA2','IGHD','IGHE','IGHG1','IGHG2','IGHG3','IGHG4',
            'IGHM','IGKC','IGLC1','IGLC2','IGLC3','TRBC1','TRBC2','TRAC']

PC = pd.read_csv('/Users/gouink/Documents/GeneLists/Human_gene_filtering_HGNC/gene_with_protein_product.txt',sep='\t',header=0,index_col=None)
NC = pd.read_csv('/Users/gouink/Documents/GeneLists/Human_gene_filtering_HGNC/non-coding_RNA.txt',sep='\t',header=0,index_col=None)
pseudo = pd.read_csv('/Users/gouink/Documents/GeneLists/Human_gene_filtering_HGNC/pseudogene.txt',sep='\t',header=0,index_col=None)
other = pd.read_csv('/Users/gouink/Documents/GeneLists/Human_gene_filtering_HGNC/other.txt',sep='\t',header=0,index_col=None)
combined = PC.append([NC,pseudo,other],ignore_index=True)
combined = combined[['symbol','locus_group']]
combined.index = combined.symbol.tolist()
combined.drop(columns=['symbol'],inplace=True)
combined.loc[keeplist,'locus_group'] = 'protein-coding gene'
genes = [x for x in adata.var_names if x in combined.index.tolist()]
adata = adata[:,genes]
adata.var = adata.var.merge(combined,right_index=True,left_index=True)
adata = adata[:,(adata.var.locus_group=='protein-coding gene')]

sc.pp.filter_genes(adata,min_cells=10,inplace=True)

adata.shape

emb = adata.obsm['X_diffmap']
               
correlation,pval = scipy.stats.spearmanr(adata.X.toarray(),adata.obsm['X_diffmap'],axis=0,nan_policy='raise')

dcs = ['diffmap_'+str(x) for x in np.arange(adata.obsm['X_diffmap'].shape[1])]

idx = adata.var_names.tolist()+dcs
correlation = pd.DataFrame(correlation,index=idx,columns=idx)

correlation.loc[:,dcs].to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/diffusionmap_correlation.csv')


## GSEA
comps = [1,2,3,4,5,6,7,8]
dcs = ['diffmap_'+str(x) for x in comps]

gsea_results = pd.DataFrame(columns=['es','nes','pval','fdr','geneset_size','matched_size','genes','ledge_genes','group'])
genesets = ['/Users/gouink/.cache/gseapy/enrichr.GO_Biological_Process_2018.gmt']

for d in dcs:
    rnk = correlation.loc[:,d].sort_values(ascending=False)
    
    for g in genesets:
        
        pre_res = gp.prerank(rnk=rnk, 
                             gene_sets=g,
                             processes=4,
                             permutation_num=100,
                             ascending=False,
                             outdir='test/prerank_report_kegg', 
                             format='png', 
                             seed=6,
                             min_size=0,
                             max_size=500,
                             verbose=True)

        pre_res = pre_res.res2d
        pre_res = pre_res[(pre_res.pval<=0.05)]
        pre_res['group'] = d
        gsea_results = gsea_results.append(pre_res)

gsea_results.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/diffusionmap_correlation_gsearesults.csv')

In [None]:
## inter-component correlations

emb = adata.obsm['X_diffmap']
dcs = ['diffmap_'+str(x) for x in np.arange(adata.obsm['X_diffmap'].shape[1])]

c,p = scipy.stats.spearmanr(emb,axis=0,nan_policy='raise')

sns.set_style("white", rc={"font.family":"Helvetica","axes.grid":False})                                                  
sns.set_context("paper", rc={"font.size":4,"axes.titlesize":4,"axes.labelsize":4,"font.family":"Helvetica","xtick.labelsize":4,"ytick.labelsize":4})
sns.clustermap(c,xticklabels=dcs,yticklabels=dcs,row_cluster=True,col_cluster=True,cmap='RdBu_r',vmin=-0.5,vmax=0.5,dendrogram_ratio=(0.05,0.05),figsize=(3,3))

In [None]:
comps = [1,2,3,4,5,6,7,8]
fig,axs = gf.diff_radarplot(adata, embedding='X_diffmap', use_obs=False, comps=comps, groupby='leiden', 
                            q=0.9, filled=True, figsize=None, fontsize=2)
plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/diffmap_radarplot_byleiden_v2.svg')

In [None]:
comps = [1,2,3,4,5,6,7,8]
fig,axs = gf.diff_radarplot(adata, embedding='X_diffmap', use_obs=False, comps=comps, groupby='leiden', foldchange=True, clip=10,
                            q=None, filled=True, figsize=None, fontsize=2)
plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/diffmap_radarplot_byleiden_foldchange.svg')

In [None]:
comps = [1,2,3,4,5,6,7,8]
fig,axs = gf.diff_radarplot_response(adata, embedding='X_diffmap', use_obs=False, comps=comps, byCluster=True, 
                                     q=0.9, filled=True, figsize=None, fontsize=2)
plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/diffmap_radarplot_bypCR_byleiden.svg')

In [None]:
fig,axs = pf.shannon_boxplot(adata,groupby='leiden',show_stats=True,drop_na=True)

In [None]:
## fisher test for response associated with diffusion components scores
comps = [1,2,3,4,5,6,7,8]
dcs = ['diffmap_'+str(x) for x in comps]
emb = adata.obsm['X_diffmap'][:,comps]
emb = pd.DataFrame(emb,index=adata.obs.index,columns=dcs)

pt = adata.obs.cohort.unique().tolist()
tx = adata.obs.treatment.unique().tolist()


idx = pd.MultiIndex.from_product([pt,tx,dcs],names=['cohort','treatment','dcs'])
df = pd.DataFrame(index=idx,columns=['score'])

for i in df.index:
    df.loc[i,'score'] = emb.loc[(adata.obs.cohort==i[0])&(adata.obs.treatment==i[1]),i[2]].mean()

df.reset_index(inplace=True)

fig,ax = pf.fisher_response(adata=None,df=df,groupby='dcs',calc_pct=False,metric='score',clip=4)

## Literature Signatures

In [None]:
metadata = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Submission-Cell-Aug2022/Tables/Supplementary_Table_CODEX_response_groups.csv', index_col= 0, header= 0)
adata.obs['short_pt'] = adata.obs['batch'].apply(lambda x: x[:3] if 'h03' not in x else f"{x[:3]}T{x[4]}")
adata.obs = adata.obs.merge(metadata['response_group'], how='left', left_on='short_pt', right_index=True)

In [None]:
## Taken from doi: 10.1038/s41591-021-01323-8
dc_sig = pd.read_csv('/Users/gouink/Documents/GeneLists/bassez_suppdata8.csv', index_col=None, header=0)
dc_sig = dc_sig.query(" pval_adjusted <= 0.01 & avg_logFC >= 0.5 ")['Gene'].tolist()

mac_sig = pd.read_csv('/Users/gouink/Documents/GeneLists/bassez_suppdata9.csv', index_col=None, header=0)
mac_sig = mac_sig.query(" pval_adjusted <= 0.01 & avg_logFC >= 0.5 ")['Gene'].tolist()

sc.tl.score_genes(adata, gene_list= dc_sig, score_name='dc_sig', use_raw=False)
sc.tl.score_genes(adata, gene_list= mac_sig, score_name='mac_sig', use_raw=False)

sc.pl.umap(adata, color= ['dc_sig', 'mac_sig'], cmap='viridis', vmax='p99', ncols=3, show=False)

individual_markers = ['CD274', 'PDCD1LG2']

for i in individual_markers:
    adata.obs[f'{i}_expr'] = adata[:, i].X.toarray()

In [None]:
tmp = adata[adata.obs.query(" leiden == '07' | leiden == '08' | leiden == '09' ").index].copy()

df_mean = tmp.obs.groupby(by=['cohort', 'treatment']).mean(numeric_only=True)
metadata = tmp.obs[['cohort', 'treatment', 'response_group']].drop_duplicates(subset=['cohort', 'treatment']).set_index(['cohort', 'treatment'])
df_mean = df_mean.merge(metadata, how='left', left_index=True, right_index=True)


fig, axs = plt.subplots(nrows= 1,
                        ncols= 3,
                        sharex= True,
                        sharey= False,
                        gridspec_kw= {'hspace': 0.2, 'wspace': 0.5},
                        figsize= (6,2))

cats = ['dc_sig'] + [f'{x}_expr' for x in individual_markers]

order = ['Base', 'PD1', 'RTPD1']
horder = ['R1', 'R2', 'NR']
tab10 = plt.get_cmap('tab10')
palette = {'R1': tab10(0), 'R2': tab10(1), 'NR': tab10(2)}

for cat, ax in zip(cats, axs.flat):

    _= sns.boxplot(data= df_mean.reset_index(),
                   x= 'treatment',
                   y= cat,
                   hue= 'response_group',
                   order= order,
                   hue_order= horder,
                   palette= palette,
                   color= 'w',
                   boxprops= {'linewidth': 0.25, 'facecolor': 'w'},
                   linewidth= 0.25,
                   fliersize= 0,
                   ax= ax)
    _= sns.stripplot(data= df_mean.reset_index(),
                     x= 'treatment',
                     y= cat,
                     hue= 'response_group',
                     order= order,
                     hue_order= horder,
                     palette= palette,
                     size= 2,
                     dodge= True,
                     ax= ax)
    _= ax.get_legend().remove()
    _= ax.grid(visible=False)

fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/myeloid_filtered/bassez_dc_sigs.pdf', bbox_inches='tight')

In [None]:
x1 = df_mean.query(" treatment == 'Base' & response_group == 'R1' ")['dc_sig'].to_numpy()
x2 = df_mean.query(" treatment == 'Base' & response_group == 'NR' ")['dc_sig'].to_numpy()

scipy.stats.ranksums(x1, x2)

In [None]:
tmp = adata[adata.obs.query(" leiden != '07' & leiden != '08' & leiden != '09' ").index].copy()

df_mean = tmp.obs.groupby(by=['cohort', 'treatment']).mean(numeric_only=True)
metadata = tmp.obs[['cohort', 'treatment', 'response_group']].drop_duplicates(subset=['cohort', 'treatment']).set_index(['cohort', 'treatment'])
df_mean = df_mean.merge(metadata, how='left', left_index=True, right_index=True)


fig, axs = plt.subplots(nrows= 1,
                        ncols= 3,
                        sharex= True,
                        sharey= False,
                        gridspec_kw= {'hspace': 0.2, 'wspace': 0.5},
                        figsize= (6,2))

cats = ['mac_sig'] + [f'{x}_expr' for x in individual_markers]

order = ['Base', 'PD1', 'RTPD1']
horder = ['R1', 'R2', 'NR']
tab10 = plt.get_cmap('tab10')
palette = {'R1': tab10(0), 'R2': tab10(1), 'NR': tab10(2)}

for cat, ax in zip(cats, axs.flat):

    _= sns.boxplot(data= df_mean.reset_index(),
                   x= 'treatment',
                   y= cat,
                   hue= 'response_group',
                   order= order,
                   hue_order= horder,
                   palette= palette,
                   color= 'w',
                   boxprops= {'linewidth': 0.25, 'facecolor': 'w'},
                   linewidth= 0.25,
                   fliersize= 0,
                   ax= ax)
    _= sns.stripplot(data= df_mean.reset_index(),
                     x= 'treatment',
                     y= cat,
                     hue= 'response_group',
                     order= order,
                     hue_order= horder,
                     palette= palette,
                     size= 2,
                     dodge= True,
                     ax= ax)
    _= ax.get_legend().remove()
    _= ax.grid(visible=False)

fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/myeloid_filtered/bassez_mac_sigs.pdf', bbox_inches='tight')

In [None]:
x1 = df_mean.query(" treatment == 'Base' & response_group == 'R1' ")['CD274_expr'].to_numpy()
x2 = df_mean.query(" treatment == 'Base' & response_group == 'NR' ")['CD274_expr'].to_numpy()

scipy.stats.ranksums(x1, x2)

In [None]:
adata.obs[f'CD274_expr'] = adata[:, 'CD274'].X.toarray()

tmp = adata[adata.obs.query(" treatment == 'Base' ").index].copy()
df_mean = tmp.obs.groupby(by=['cohort', 'leiden']).mean(numeric_only=True)
metadata = tmp.obs[['cohort', 'leiden', 'response_group']].drop_duplicates(subset=['cohort', 'leiden']).set_index(['cohort', 'leiden'])
df_mean = df_mean.merge(metadata, how='left', left_index=True, right_index=True)


fig, axs = plt.subplots(figsize= (4,2))

order = sorted(adata.obs['leiden'].unique().tolist())
horder = ['R1', 'R2', 'NR']
tab10 = plt.get_cmap('tab10')
palette = {'R1': tab10(0), 'R2': tab10(1), 'NR': tab10(2)}

_= sns.boxplot(data= df_mean.reset_index(),
                x= 'leiden',
                y= 'CD274_expr',
                hue= 'response_group',
                order= order,
                hue_order= horder,
                palette= palette,
                color= 'w',
                boxprops= {'linewidth': 0.25, 'facecolor': 'w'},
                linewidth= 0.25,
                fliersize= 0,
                ax= axs)
_= sns.stripplot(data= df_mean.reset_index(),
                    x= 'leiden',
                    y= 'CD274_expr',
                    hue= 'response_group',
                    order= order,
                    hue_order= horder,
                    palette= palette,
                    size= 2,
                    dodge= True,
                    ax= axs)
_= axs.get_legend().remove()
_= axs.grid(visible=False)

fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/myeloid_filtered/CD274_by_leiden_response.pdf')


In [None]:
tmp = df_mean.reset_index()

for i in ['00','01','02','03','04','05','06','07','08','09','10']:

    x1 = tmp.query(" leiden == '11' & response_group == 'R1' ")['CD274_expr'].to_numpy()
    x2 = tmp.query(" leiden == @i & response_group == 'R1' ")['CD274_expr'].to_numpy()

    print(i, scipy.stats.ranksums(x1, x2))

for i in ['00','01','02','03','04','05','06','07','08','09','10']:

    x1 = tmp.query(" leiden == '11' & response_group == 'R2' ")['CD274_expr'].to_numpy()
    x2 = tmp.query(" leiden == @i & response_group == 'R2' ")['CD274_expr'].to_numpy()

    print(i, scipy.stats.ranksums(x1, x2))

In [None]:
for l in ['00','01','02','03','04','05','06','07','08','09','10','11']:
    x1 = tmp.query(" leiden == @l & response_group == 'R1' ")['CD274_expr'].to_numpy()
    x2 = tmp.query(" leiden == @l & response_group == 'R2' ")['CD274_expr'].to_numpy()
    _, pval = scipy.stats.ranksums(x1, x2)
    if pval <= 0.10:
        print(f"cluster{l}: R1 vs R2: p={pval}")

    x1 = tmp.query(" leiden == @l & response_group == 'R1' ")['CD274_expr'].to_numpy()
    x2 = tmp.query(" leiden == @l & response_group == 'NR' ")['CD274_expr'].to_numpy()
    _, pval = scipy.stats.ranksums(x1, x2)
    if pval <= 0.10:
        print(f"cluster{l}: R1 vs NR: p={pval}")

    x1 = tmp.query(" leiden == @l & response_group == 'R2' ")['CD274_expr'].to_numpy()
    x2 = tmp.query(" leiden == @l & response_group == 'NR' ")['CD274_expr'].to_numpy()
    _, pval = scipy.stats.ranksums(x1, x2)
    if pval <= 0.10:
        print(f"cluster{l}: R2 vs NR: p={pval}")