In [None]:
import scanpy as sc
from glob import glob
import pandas as pd
import numpy as np
import seaborn as sns
import anndata
import scipy
import re
import os
import matplotlib
import math
import random
import itertools
from statannot import add_stat_annotation
from matplotlib import pyplot as plt
from matplotlib import rcParams
from matplotlib.legend import Legend
import matplotlib.gridspec as gridspec

import generalfunctions as gf
import populationfunctions as pf
import dgexfunctions as dgexfunc

%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)
pd.options.display.max_seq_items = 2000

sc.set_figure_params(scanpy=True, dpi=300, dpi_save=300, frameon=True, vector_friendly=True, fontsize=8, 
                         color_map='Dark2', format='pdf', transparent=True, ipython_format='png2x')

rcParams.update({'font.size': 8})
rcParams.update({'font.family': 'Helvetica'})
rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42
rcParams['svg.fonttype'] = 'none'
rcParams['figure.facecolor'] = (1,1,1,1)

import warnings
warnings.filterwarnings("ignore")

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML
display(HTML("""
<style>
#notebook-container {
    width: 100%
}
 
.code_cell {
   flex-direction: row !important;
}
 
.code_cell .input {
    width: 50%
}
 
.code_cell .output_wrapper {
    width: 50%
}
</style>
"""))

In [None]:
## load celltype data

embFiles = {'tcell_filtered':'umap_n-0055_md-0.80_s-2.28.npy',
           'bcell_filtered':'umap_n-0028_md-0.40_s-1.39.npy',
           'myeloid_filtered':'umap_n-0064_md-0.10_s-1.61.npy'}

clustFiles = {'tcell_filtered':'scvi_cugraph_leiden_nbr100_res0.6.npy',
           'bcell_filtered':'scvi_cugraph_leiden_nbr100_res0.6.npy',
           'myeloid_filtered':'scvi_cugraph_leiden_nbr30_res0.8.npy'}


celltype = 'tcell_filtered'
path = '/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/scvi_outputs/'

adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/h5adfiles/PembroRT_immune_R100_final.h5ad')

BC = np.load(path+'/'+celltype+'/barcodes.npy',allow_pickle=True)
adata = adata[list(BC)].copy()
adata

emb = np.load(path+'/'+celltype+'/'+embFiles[celltype])
clust = np.load(path+'/'+celltype+'/'+clustFiles[celltype])

adata.obsm['X_umap'] = emb
adata.obs['leiden'] = [str(x) for x in clust]
adata.obs.leiden = adata.obs.leiden.astype('category')

metadata = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/PEMBRORT_CLINICAL_METADATA_FORSCSEQ_KHG20210624.csv',index_col=None,header=0)
cols = [x for x in metadata.columns if x not in adata.obs.columns]
metadata = metadata[cols]
adata.obs = adata.obs.reset_index().merge(metadata,left_on='cohort',right_on='Patient_Number',how='left').set_index('index')
# adata.obs.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/metadata_collection/'+celltype+'_obs.csv')

dotsize = (120000/len(adata))*2

sc.pl.umap(adata,color='leiden',legend_loc='on data',size=dotsize,show=False)
# plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/umap_by_leiden.png',dpi=600,bbox_inches='tight')

adata.raw = adata
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000)
sc.pp.log1p(adata)

# dgex = dgexfunc.leiden_dgex(adata)
# dgex.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_dgex.csv')

In [None]:
GOI = ['CD3D','CD3E','NCR1','NKG7','CD4','CD8B','CXCL13','HAVCR2','TIGIT','PDCD1','FOXP3','CD40LG','IL7R','TCF7','CCR7','MKI67']

sc.pl.umap(adata,color=GOI,use_raw=False,color_map='inferno',vmin=0,vmax='p99',ncols=4,size=dotsize)

In [None]:
## cluster dgex heatmap

dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_dgex.csv',index_col=0,header=0)
  
ax = pf.dgex_plot(adata,dgex=dgex,groupby='leiden',topn=50,pvalCutoff=0.05,fcCutoff=1,pctCutoff=0.3,use_FDR=True,
               dendro=False,plot_type='scanpy_heatmap',cmap='Blues',figsize=(2,4),fontsize=1)

# plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_dgex_heatmap.svg')

In [None]:
## boxplots of cluster percentages

fig,ax = pf.pct_boxplot(adata,groupby='leiden',rep='cohort',xcat='treatment',hcat='pCR',
                    xorder=['Base','PD1','RTPD1'],horder=['R','NR'],show_stats=True,thresh=None)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_pct_boxplot.svg')


In [None]:
## lineplots of cluster percentages

fig,ax = pf.pct_lineplot(adata,groupby='leiden',rep='cohort',xcat='treatment',hcat='pCR',
                    xorder=['Base','PD1','RTPD1'],horder=['R','NR'],show_stats=True,direction='less',thresh=None)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_pct_lineplot.svg')


In [None]:
## lineplots of cluster percentages

fig,ax = pf.med_lineplot(adata,groupby='leiden',rep='cohort',xcat='treatment',hcat='pCR',
                    xorder=['Base','PD1','RTPD1'],horder=['R','NR'],thresh=None)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/leiden_med_lineplot.svg')


In [None]:
## cluster correlations

df = pf.pct_df(adata,groupby='leiden',drop_na=True)

df = df[(df.pCR!='unknown')]

df = df.pivot(index='cohort',columns=['treatment','leiden'],values='percent')


df = df.astype(float).corr(method='spearman')

st = sorted(adata.obs.leiden.unique().tolist())

txColors = {'Base':'tab:blue',
           'PD1':'tab:green',
           'RTPD1':'tab:red'}
clustColors = {s:adata.uns['leiden_colors'][i] for i,s in enumerate(st)}

rowCL1 = []
rowCL2 = []

for i in df.index:
    rowCL1 = rowCL1 + [txColors[i[0]]]
    rowCL2 = rowCL2 + [clustColors[i[1]]]
    
sns.set_style("white", rc={"font.family":"Helvetica","axes.grid":True})                                                  
sns.set_context("paper", rc={"font.size":8,"axes.titlesize":8,"axes.labelsize":8,"font.family":"Helvetica","xtick.labelsize":8,"ytick.labelsize":8})
   
sns.clustermap(df,method='complete',row_cluster=True,col_cluster=True,dendrogram_ratio=(0.05,0.05),
               cmap='RdBu_r',row_colors=[rowCL1,rowCL2],col_colors=[rowCL1,rowCL2],vmin=-1,vmax=1)

plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/cluster_correlation_heatmap.svg')

In [None]:
## cluster correlations

df = pf.pct_df(adata,groupby='leiden',drop_na=True)

df = df[(df.pCR!='unknown')]

df = df.pivot(index='cohort',columns=['treatment','leiden'],values='percent')
df.dropna(how='any',inplace=True)

cols = df.columns.tolist()

corr,pval = scipy.stats.spearmanr(df.to_numpy(dtype=np.float64),axis=0,nan_policy='raise')
        
corr = pd.DataFrame(corr,index=cols,columns=cols)

pval = -1*np.log10(pval)
pval[np.isinf(pval)] = np.max(pval[~np.isinf(pval)])
pval = pd.DataFrame(pval,index=cols,columns=cols)

d = scipy.spatial.distance.pdist(corr.to_numpy(dtype='float64'),metric='euclidean')
l = scipy.cluster.hierarchy.linkage(d,metric='euclidean',method='complete',optimal_ordering=True)
dn = scipy.cluster.hierarchy.dendrogram(l,no_plot=True)
order = dn['leaves']

corr = corr.iloc[order,order]
pval = pval.iloc[order,order]

st = sorted(adata.obs.leiden.unique().tolist())

txColors = {'Base':'tab:blue',
           'PD1':'tab:green',
           'RTPD1':'tab:red'}
clustColors = {s:adata.uns['leiden_colors'][i] for i,s in enumerate(st)}

rowCL1 = []
rowCL2 = []

for i in corr.index:
    rowCL1 = rowCL1 + [txColors[i[0]]]
    rowCL2 = rowCL2 + [clustColors[i[1]]]

    
fig,axs = plt.subplots(nrows=2,ncols=1,figsize=(2,4))
gf.heatmap2(corr,cmap='RdBu_r',vmin=-1,vmax=1,cellsize=pval,square=True,cellsize_vmax=3,fontsize=2,rowcolors=[rowCL1,rowCL2],ax=axs[0])


icoord = np.array(dn['icoord'] )
dcoord = np.array(dn['dcoord'] )

for xs, ys in zip(icoord, dcoord):
    _= axs[1].plot(xs, ys, color='k', linewidth=0.5)
axs[1].grid(b=False)


plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/'+celltype+'/cluster_correlation_heatmap.svg')

In [None]:
## diffusion component embedding

# BC = np.load(path+'/'+celltype+'/barcodes.npy',allow_pickle=True)
# adata = adata[list(BC)].copy()

# adata = adata[(adata.obs.leiden!='00')&(adata.obs.leiden!='01')&(adata.obs.leiden!='07')]

# emb = np.load(path+'/'+celltype+'/diffmap_nbr100_filtered.npy',allow_pickle=True)
# adata.obsm['X_diffmap'] = emb

# %matplotlib notebook

c = ['1,2','1,3','1,6','2,6','3,6','2,3']
st = adata.obs.leiden.unique().tolist()
forder = [(x,y) for x in c for y in st]

sns.set_style("white", rc={"font.family":"Helvetica","axes.grid":False})                                                  
sns.set_context("paper", rc={"font.size":4,"axes.titlesize":4,"axes.labelsize":4,"font.family":"Helvetica","xtick.labelsize":4,"ytick.labelsize":4})
fig,axs = plt.subplots(nrows=len(c),ncols=len(st),sharex=True,sharey=True,figsize=(len(st),len(c)))

for f,ax in zip(forder,np.ravel(axs)):
    sc.pl.diffmap(adata,color='leiden',components=f[0],groups=f[1],projection='2d',size=dotsize,ax=ax,title=f[1],show=False)


## Literature Signatures

In [None]:
metadata = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Submission-Cell-Aug2022/Tables/Supplementary_Table_CODEX_response_groups.csv', index_col= 0, header= 0)
adata.obs['short_pt'] = adata.obs['batch'].apply(lambda x: x[:3] if 'h03' not in x else f"{x[:3]}T{x[4]}")
adata.obs = adata.obs.merge(metadata['response_group'], how='left', left_on='short_pt', right_index=True)

In [None]:
## Taken from doi: 10.1038/s41591-021-01323-8
cytotoxicity_module = ['PRF1', 'GZMB', 'GZMA', 'IFNG', 'NKG7', 'GNLY']
checkpoint_module = ['CTLA4', 'PDCD1', 'HAVCR2', 'ENTPD1', 'TIGIT', 'LAG3', 'BTLA']
with open('/Users/gouink/Documents/GeneLists/bassez_suppdata6.csv', 'r') as f:
    cd4_module = f.read().splitlines()

sc.tl.score_genes(adata, gene_list= cytotoxicity_module, score_name='cytotoxicity_module', use_raw=False)
sc.tl.score_genes(adata, gene_list= checkpoint_module, score_name='checkpoint_module', use_raw=False)
sc.tl.score_genes(adata, gene_list= cd4_module, score_name='cd4_module', use_raw=False)

sc.pl.umap(adata, color= ['cytotoxicity_module', 'checkpoint_module', 'cd4_module'], cmap='viridis', vmax='p99', ncols=3, show=False)

individual_markers = ['PDCD1', 'CTLA4', 'CXCL13', 'GZMB']

for i in individual_markers:
    adata.obs[f'{i}_expr'] = adata[:, i].X.toarray()

In [None]:
# tmp = adata[adata.obs.query(" leiden != '01' & leiden != '07' & leiden != '03' ").index].copy()
# tmp = adata[adata.obs.query(" leiden == '06' | leiden == '04' ").index].copy()
tmp = adata[adata.obs.query(" leiden == '02' | leiden == '06' | leiden == '04' ").index].copy()

df_mean = tmp.obs.groupby(by=['cohort', 'treatment']).mean(numeric_only=True)
metadata = tmp.obs[['cohort', 'treatment', 'response_group']].drop_duplicates(subset=['cohort', 'treatment']).set_index(['cohort', 'treatment'])
df_mean = df_mean.merge(metadata, how='left', left_index=True, right_index=True)
df_mean['combined_individual'] = df_mean[[f'{x}_expr' for x in individual_markers]].sum(axis=1)

In [None]:
fig, axs = plt.subplots(nrows= 2,
                        ncols= 4,
                        sharex= True,
                        sharey= False,
                        gridspec_kw= {'hspace': 0.2, 'wspace': 0.5},
                        figsize= (8,4))

cats = ['cytotoxicity_module', 'checkpoint_module', 'cd4_module', 'combined_individual'] + [f'{x}_expr' for x in individual_markers]

order = ['Base', 'PD1', 'RTPD1']
horder = ['R1', 'R2', 'NR']
tab10 = plt.get_cmap('tab10')
palette = {'R1': tab10(0), 'R2': tab10(1), 'NR': tab10(2)}

for cat, ax in zip(cats, axs.flat):

    _= sns.boxplot(data= df_mean.reset_index(),
                   x= 'treatment',
                   y= cat,
                   hue= 'response_group',
                   order= order,
                   hue_order= horder,
                   palette= palette,
                   color= 'w',
                   boxprops= {'linewidth': 0.25, 'facecolor': 'w'},
                   linewidth= 0.25,
                   fliersize= 0,
                   ax= ax)
    _= sns.stripplot(data= df_mean.reset_index(),
                     x= 'treatment',
                     y= cat,
                     hue= 'response_group',
                     order= order,
                     hue_order= horder,
                     palette= palette,
                     size= 2,
                     dodge= True,
                     ax= ax)
    _= ax.get_legend().remove()
    _= ax.grid(visible=False)

fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/tcell_filtered/bassez_tcell_sigs.pdf')

In [None]:
for cat in cats:

    x1 = df_mean.query(" treatment == 'Base' & response_group == 'R1' ")[cat].to_numpy()
    x2 = df_mean.query(" treatment == 'Base' & response_group == 'NR' ")[cat].to_numpy()

    print(cat, scipy.stats.ranksums(x1, x2))

## T-cell atlas annotations

In [None]:
## Taken from DOI: 10.1126/science.abe6474

cd8_genes = [
                'TCF7',
                'LEF1',
                'CCR7',
                'SELL',
                'MAL',
                'IL7R',
                'GPR183',
                'ZFP36L2',
                'CXCR4',
                'ZNF683',
                'CD52',
                'HOPX',
                'ID2',
                'CXCR6',
                'XCL1',
                'XCL2',
                'TBX21',
                'ASCL2',
                'CX3CR1',
                'KLRG1',
                'KLRD1',
                'TYROBP',
                'KIR2DL3',
                'KIR2DL1',
                'KIR3DL2',
                'CD160',
                'EOMES',
                'TXK',
                'KLRC1',
                'KIR2DL4',
                'GZMK',
                'CXCR5',
                'CCR4',
                'CD28',
                'CXCR3',
                'GZMH',
                'CD27',
                'HLA-DRB1',
                'PDCD1',
                'CXCL13',
                'LAYN',
                'STAT1',
                'IFIT1',
                'ISG15',
                'CCR1',
                'SLC4A10',
                'KLRB1',
                'TMIGD2',
                'RORA',
                'RORC',
                'ZBTB16',
                'IL26',
                'IL17A',
                'IL23R',
                'NME1',
                'NME2',
                'MND1',
                'SPC24',
                'MYB'
            ]

cd4_genes = [
                'CD40LG',
                'FOXP3',
                'TCF7',
                'LEF1',
                'TXK',
                'CCR7',
                'SELL',
                'MAL',
                'CXCR5',
                'ADSL',
                'IL16',
                'IL7R',
                'TNF',
                'AREG',
                'TIMP1',
                'CREM',
                'CCL5',
                'CAPG',
                'GZMK',
                'KLRG1',
                'CX3CR1',
                'TBX21',
                'RORA',
                'RORC',
                'CCR6',
                'IL23R',
                'IL22',
                'IL17A',
                'IL17F',
                'IL26',
                'TOX',
                'TOX2',
                'IL21',
                'CXCL13',
                'GNG4',
                'CD200',
                'BCL6',
                'ZBED2',
                'CCL3',
                'CCL4',
                'IFNG',
                'GZMB',
                'LAG3',
                'HAVCR2',
                'RTKN2',
                'IL2RA',
                'S1PR1',
                'TNFRSF9',
                'CTLA4',
                'LAYN',
                'STAT1',
                'IFIT1',
                'IRF7',
                'NME1',
                'NME2',
                'MND1',
                'SPC24',
                'CCR4'
            ]

In [None]:
GOI = ['CD3D', 'CD3E', 'CD4', 'CD8A', 'CD8B']
clust = adata.obs['leiden'].unique()

dfList = []

for c in clust:
    nnz = adata[adata.obs.query(" leiden == @c ").index, GOI].X.getnnz(axis= 0) / len(adata.obs.query(" leiden == '04' ").index)
    expr = adata[adata.obs.query(" leiden == @c ").index, GOI].X.toarray().mean(axis= 0)

    dfList.append(pd.DataFrame(np.array((nnz, expr, [int(c)]*len(nnz))).T, 
                               index= GOI, 
                               columns= ['nnz', 'expr', 'group']))

tmp = pd.concat(dfList)
tmp.reset_index(names= 'names', inplace=True)

## z-score expression
mean_dict = {k: tmp.query(" names == @k ")['expr'].mean() for k in GOI}
std_dict = {k: tmp.query(" names == @k ")['expr'].std() for k in GOI}
tmp['expr_z'] = [(tmp.loc[x, 'expr'] - mean_dict[tmp.loc[x, 'names']]) / std_dict[tmp.loc[x, 'names']] for x in tmp.index]


gene_dict = {k:v for v,k in enumerate(tmp['names'].unique())}
num_genes = len(gene_dict)
num_clust = len(tmp['group'].unique())

tmp['y_cat'] = tmp['names'].apply(lambda x: gene_dict[x])
tmp['x_cat'] = tmp['group'].astype(int)

## Figure parameters
order = sorted(tmp['x_cat'].tolist())
cmap = 'RdBu_r'
vmin = -3
vmax = 3
smin = 0.25
smax = 0.9

## Set up figure layout
fig, axd = plt.subplot_mosaic(mosaic= [["main"]],
                              figsize= (2, 1),
                              layout= 'constrained')

## Make dotplot
_= sns.scatterplot(data= tmp,
                   x= 'x_cat',
                   y= 'y_cat',
                   hue= 'expr_z',
                   hue_norm= (vmin, vmax),
                   size= 'nnz',
                   size_norm= (smin, smax),
                   palette= cmap,
                   ax= axd['main'])

## Formatting
_= axd['main'].grid(visible= False)
_= axd['main'].set_ylabel('')
_= axd['main'].set_xlabel('')
_= axd['main'].set_xlim((-0.5, num_clust - 0.5))
_= axd['main'].set_ylim((-0.5, num_genes - 0.5))
_= axd['main'].set_yticks(np.arange(num_genes))
_= axd['main'].set_yticklabels(GOI, fontsize= 4)
_= axd['main'].set_xticks(np.arange(num_clust))
_= axd['main'].set_xticklabels([f't{x}' for x in sorted(tmp['group'].unique())], fontsize= 4)

## Deal with legend (only show pval-related legend)
_= axd['main'].get_legend().set_visible(False)
h, l = axd['main'].get_legend_handles_labels()
pval_patch = int(np.argwhere([x == 'nnz' for x in l])) + 1
lg = axd['main'].legend(handles= h[pval_patch: ],
                        labels= l[pval_patch: ],
                        bbox_to_anchor= (1.05, 1),
                        frameon= False,
                        fontsize= 4,
                        title= 'fraction cells non-zero',
                        title_fontsize= 4)

## Add colorbar
cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin= vmin, vmax= vmax), cmap= cmap), 
                    ax=axd['main'],
                    orientation= 'horizontal',
                    location= 'top',
                    ticks= [vmin, 0, vmax],
                    pad= 0.01,
                    shrink= 0.5,
                    aspect= 20)

_= cbar.ax.set_xticklabels([f"≤ {vmin}", "0", f"≥ {vmax}"], fontsize= 4)
_= cbar.ax.set_title('z-score expression', fontsize= 4, pad= 1)
_= cbar.ax.tick_params(which= 'both', length= 1.5, width= 0.5, pad= 1)
_= cbar.ax.get_children()[6].set_linewidth(0.5)

fig.savefig(f'/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/{celltype}/cd3cd4cd8_leiden.pdf')

In [None]:
## CD8 genes
df = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/tcell_filtered/leiden_dgex.csv', index_col= 0, header= 0)

GOI = [x for x in cd8_genes[::-1] if x in df['names'].tolist()]
tmp = df.query(" names in @GOI ").copy()
tmp = tmp.query(" group == 2 | group == 4 | group == 6 ").copy()

gene_dict = {k:v for v,k in enumerate(GOI)}
clust_dict = {k:v for v,k in enumerate(sorted(tmp['group'].unique()))}
num_genes = len(gene_dict)
num_clust = len(clust_dict)

tmp['y_cat'] = tmp['names'].apply(lambda x: gene_dict[x])
tmp['x_cat'] = tmp['group'].apply(lambda x: clust_dict[x])
tmp['pvals_conv'] = -1 * np.log10(tmp['pvals_adj'])
tmp.loc[np.ma.masked_invalid(tmp['pvals_conv']).mask, 'pvals_conv'] = tmp.loc[~np.ma.masked_invalid(tmp['pvals_conv']).mask, 'pvals_conv'].max()


## Figure parameters
order = sorted(tmp['x_cat'].tolist())
cmap = 'RdYlBu_r'
vmin = -5
vmax = 5
smin = 0
smax = 300

## Set up figure layout
fig, axd = plt.subplot_mosaic(mosaic= [["main"]],
                              figsize= (1.5, 10),
                              layout= 'constrained')

## Make dotplot
_= sns.scatterplot(data= tmp,
                   x= 'x_cat',
                   y= 'y_cat',
                   hue= 'logfoldchanges',
                   hue_norm= (vmin, vmax),
                   size= 'pvals_conv',
                   sizes= (0, 50),
                   size_norm= (smin, smax),
                   palette= cmap,
                   ax= axd['main'])

## Formatting
_= axd['main'].grid(visible= False)
_= axd['main'].set_ylabel('')
_= axd['main'].set_xlabel('')
_= axd['main'].set_xlim((-0.5, num_clust - 0.5))
_= axd['main'].set_ylim((-0.5, num_genes - 0.5))
_= axd['main'].set_yticks(np.arange(num_genes))
_= axd['main'].set_yticklabels(GOI, fontsize= 8)
_= axd['main'].set_xticks(np.arange(num_clust))
_= axd['main'].set_xticklabels([f't{x}' for x in sorted(tmp['group'].unique())], fontsize= 4)

## Deal with legend (only show pval-related legend)
_= axd['main'].get_legend().set_visible(False)
h, l = axd['main'].get_legend_handles_labels()
pval_patch = int(np.argwhere([x == 'pvals_conv' for x in l])) + 1
lg = axd['main'].legend(handles= h[pval_patch: ],
                        labels= l[pval_patch: ],
                        bbox_to_anchor= (1.05, 1),
                        frameon= False,
                        fontsize= 4,
                        title= '-log10(pval)',
                        title_fontsize= 4)

## Add colorbar
cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin= vmin, vmax= vmax), cmap= cmap), 
                    ax=axd['main'],
                    orientation= 'horizontal',
                    location= 'top',
                    ticks= [vmin, 0, vmax],
                    pad= 0.01,
                    shrink= 0.5,
                    aspect= 20)

_= cbar.ax.set_xticklabels([f"≤ {vmin}", "0", f"≥ {vmax}"], fontsize= 4)
_= cbar.ax.set_title('log fold change', fontsize= 4, pad= 1)
_= cbar.ax.tick_params(which= 'both', length= 1.5, width= 0.5, pad= 1)
_= cbar.ax.get_children()[6].set_linewidth(0.5)

fig.savefig(f'/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/{celltype}/zheng_cd8genes.pdf', bbox_inches='tight')


In [None]:
## CD4 genes
df = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/tcell_filtered/leiden_dgex.csv', index_col= 0, header= 0)

GOI = [x for x in cd4_genes[::-1] if x in df['names'].tolist()]
tmp = df.query(" names in @GOI ").copy()
tmp = tmp.query(" group == 2 | group == 3 | group == 5 ").copy()

gene_dict = {k:v for v,k in enumerate(GOI)}
clust_dict = {k:v for v,k in enumerate(sorted(tmp['group'].unique()))}
num_genes = len(gene_dict)
num_clust = len(clust_dict)

tmp['y_cat'] = tmp['names'].apply(lambda x: gene_dict[x])
tmp['x_cat'] = tmp['group'].apply(lambda x: clust_dict[x])
tmp['pvals_conv'] = -1 * np.log10(tmp['pvals_adj'])
tmp.loc[np.ma.masked_invalid(tmp['pvals_conv']).mask, 'pvals_conv'] = tmp.loc[~np.ma.masked_invalid(tmp['pvals_conv']).mask, 'pvals_conv'].max()

## Figure parameters
order = sorted(tmp['x_cat'].tolist())
cmap = 'RdYlBu_r'
vmin = -5
vmax = 5
smin = 0
smax = 500

## Set up figure layout
fig, axd = plt.subplot_mosaic(mosaic= [["main"]],
                              figsize= (1.5, 10),
                              layout= 'constrained')

## Make dotplot
_= sns.scatterplot(data= tmp,
                   x= 'x_cat',
                   y= 'y_cat',
                   hue= 'logfoldchanges',
                   hue_norm= (vmin, vmax),
                   size= 'pvals_conv',
                   sizes= (0, 50),
                   size_norm= (smin, smax),
                   palette= cmap,
                   ax= axd['main'])

## Formatting
_= axd['main'].grid(visible= False)
_= axd['main'].set_ylabel('')
_= axd['main'].set_xlabel('')
_= axd['main'].set_xlim((-0.5, num_clust - 0.5))
_= axd['main'].set_ylim((-0.5, num_genes - 0.5))
_= axd['main'].set_yticks(np.arange(num_genes))
_= axd['main'].set_yticklabels(GOI, fontsize= 8)
_= axd['main'].set_xticks(np.arange(num_clust))
_= axd['main'].set_xticklabels([f't{x}' for x in sorted(tmp['group'].unique())], fontsize= 4)

## Deal with legend (only show pval-related legend)
_= axd['main'].get_legend().set_visible(False)
h, l = axd['main'].get_legend_handles_labels()
pval_patch = int(np.argwhere([x == 'pvals_conv' for x in l])) + 1
lg = axd['main'].legend(handles= h[pval_patch: ],
                        labels= l[pval_patch: ],
                        bbox_to_anchor= (1.05, 1),
                        frameon= False,
                        fontsize= 4,
                        title= '-log10(pval)',
                        title_fontsize= 4)

## Add colorbar
cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin= vmin, vmax= vmax), cmap= cmap), 
                    ax=axd['main'],
                    orientation= 'horizontal',
                    location= 'top',
                    ticks= [vmin, 0, vmax],
                    pad= 0.01,
                    shrink= 0.5,
                    aspect= 20)

_= cbar.ax.set_xticklabels([f"≤ {vmin}", "0", f"≥ {vmax}"], fontsize= 4)
_= cbar.ax.set_title('log fold change', fontsize= 4, pad= 1)
_= cbar.ax.tick_params(which= 'both', length= 1.5, width= 0.5, pad= 1)
_= cbar.ax.get_children()[6].set_linewidth(0.5)

fig.savefig(f'/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/{celltype}/zheng_cd4genes.pdf', bbox_inches='tight')


## Codex CXCL13 expression

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/Submission-Cell-Aug2022/Data/PembroRT_TNBC_immune_CODEX.h5ad')
metadata = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Submission-Cell-Aug2022/Tables/Supplementary_Table_CODEX_response_groups.csv', index_col= 0, header= 0)

adata.obs = adata.obs.merge(metadata, how='left', left_on= 'patient', right_index= True)

adata
adata.obs
adata.var_names

In [None]:
mosaic = [['CD8T_R1','CD4T_R1'],
          ['CD8T_R2','CD4T_R2'],
          ['CD8T_NR','CD4T_NR']]
       
tab10 = plt.get_cmap('tab10')
palette = {'R1': tab10(0), 'R2': tab10(1), 'NR': tab10(2)}

order = metadata.query(" response_group == 'R1' ").index.tolist()
order += metadata.query(" response_group == 'R2' ").index.tolist()
order += metadata.query(" response_group == 'NR' ").index.tolist()
palette2 = {x:palette[metadata.loc[x,'response_group']] for x in order}

fig, axd = plt.subplot_mosaic(mosaic= mosaic,
                              sharex= True,
                              sharey= False,
                              gridspec_kw= {'hspace':0.1, 'wspace':0.05},
                              layout= 'constrained',
                              figsize=(3,2))

for k, ax in axd.items():

    c = k.split('_')[0]
    p = k.split('_')[1]
    idx = adata.obs.query(" celltype == @c & response_group == @p & treatment == 1 ").index
    tmp = pd.DataFrame(np.log10(adata[idx, 'CXCL13_membrane_mean'].X),
                       index= idx,
                       columns= ['CXCL13'])
    tmp = tmp.merge(adata.obs, how='left', left_index= True, right_index= True)

    _= sns.kdeplot(data= tmp,
                   hue= 'patient',
                   x= 'CXCL13',
                   fill= False,
                   palette= palette2,
                   legend= False,
                   common_norm= False,
                   lw= 0.5,
                   ax= ax)

    m = tmp['CXCL13'].median()
    _= ax.axvline(x= m, lw=0.5, ls='--', c= palette[p])
    _= ax.set_ylabel('frequency', fontsize= 4)
    _= ax.set_xlabel('log10 CXCL13 intensity', fontsize= 4)
    _= ax.set_yticks([])
    _= ax.set_xticks(np.arange(5))
    _= ax.set_xticklabels([str(x) for x in np.arange(5)], fontsize= 4)
    _= ax.tick_params(axis= 'x', length=0.5, width=0.5)
    _= ax.set_xlim(0, 4)
    _= ax.grid(visible= False)

fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/tcell_filtered/codex_cxcl13expr.pdf')

In [None]:
idx = adata.obs.query(" celltype == 'CD8T' & treatment == 1 ").index
tmp = pd.DataFrame(adata[idx, 'CXCL13_membrane_mean'].X.toarray(),
                    index= idx,
                    columns= ['CXCL13'])
tmp = tmp.merge(adata.obs, how= 'left', left_index= True, right_index= True)
avg = pd.DataFrame(tmp.groupby(by=['patient'])['CXCL13'].mean())
avg = avg.merge(metadata, how='left', left_index= True, right_index= True)

scipy.stats.ranksums(avg.query(" response_group == 'R1' ")['CXCL13'].to_numpy(),
                     avg.query(" response_group == 'R2' ")['CXCL13'].to_numpy())

scipy.stats.ranksums(avg.query(" response_group == 'R1' ")['CXCL13'].to_numpy(),
                     avg.query(" response_group == 'NR' ")['CXCL13'].to_numpy())