In [None]:
import re
import os
import scvi
import scipy
import pickle
import anndata
import logging
import warnings
import matplotlib
import celltypist
import scipy.stats
import scanpy as sc
import scrublet as scr
import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from matplotlib import rcParams
from matplotlib.legend import Legend
import matplotlib.gridspec as gridspec

sc.set_figure_params(scanpy=True, dpi=300, dpi_save=300, frameon=True, vector_friendly=True, fontsize=8, 
                         color_map='Dark2', format='pdf', transparent=True, ipython_format='png2x')

warnings.simplefilter(action="ignore", category=FutureWarning)

scvi.settings.seed = 94705

%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

rcParams.update({'font.size': 8})
rcParams.update({'font.family': 'Helvetica'})
rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42
rcParams['svg.fonttype'] = 'none'
rcParams['figure.facecolor'] = (1,1,1,1)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import populationfunctions as pf
import dgexfunctions as dgex
import generalfunctions as gf

logger = logging.getLogger()

def gex_clustermap(tmp=None, GOI=None, groupby=None, use_raw=False,
                   agg_function='mean',
                   genes_on='rows',
                   cluster_method='complete',
                   row_cluster=True, col_cluster=True,
                   row_order=None, col_order=None,
                   standard_scale=False, zscore=True,
                   dendrogram_ratio=None,
                   cmap=None,
                   cbar=True, cbar_shrink=1.0, cbar_title='',
                   xlabel=None, ylabel=None, 
                   x_rotation=0, y_rotation=0,
                   xtick_fontsize=2, ytick_fontsize=2,
                   vmin=-3, vmax=3, figsize=None, fontsize=2):
    
    """
    Clustered heatmap for gene expression, with genes requested in GOI [list] on the
    genes_on axis and groupby categories on the opposite axis. Expression values represent
    the expression in each category using agg_function (mean, median, min, or max).

    Returns fig
    """

    if GOI is None:
        GOI = tmp.var_names
    if figsize is None:
        figsize = (2,3)
    if dendrogram_ratio is None:
        dendrogram_ratio = (0.3,0.05)
    if xlabel is None:
        xlabel = ''
    if ylabel is None:
        ylabel = ''
    
    if use_raw:
        df = pd.DataFrame(tmp[:,GOI].raw.X.toarray(),index=tmp.obs.index,columns=GOI)
    else:
        df = pd.DataFrame(tmp[:,GOI].X.toarray(),index=tmp.obs.index,columns=GOI)
        
    df = df.merge(tmp.obs, how='left', left_index=True, right_index=True)

    if agg_function == 'mean':
        df = df.groupby(groupby).mean(numeric_only=True)[GOI]
    elif agg_function == 'median':
        df = df.groupby(groupby).median(numeric_only=True)[GOI]
    elif agg_function == 'min':
        df = df.groupby(groupby).min(numeric_only=True)[GOI]
    elif agg_function == 'max':
        df = df.groupby(groupby).max(numeric_only=True)[GOI]
    else:
        print(f"Unknown agg_function {agg_function}.")
        return None

    norm_dim = 1
    if genes_on=='rows':
        df = df.T
        norm_dim = 0
        
    if row_order is not None:
        df = df.loc[row_order,:].copy()
    if col_order is not None:
        df = df.loc[:,col_order].copy()
    
    if not cbar:
        cbar_pos = None
    else:
        cbar_pos = (-0.02, 0.92, 0.05*cbar_shrink, 0.18*cbar_shrink)
    
    cbar_kws = dict(ticks=[vmin, 0.50, vmax], orientation='horizontal')

    if cmap is None:
        if zscore:
            cmap = 'RdBu_r'
        else:
            cmap = 'Reds'

    if standard_scale:
        g = sns.clustermap(df,
                           method= cluster_method,
                           standard_scale= norm_dim,
                           row_cluster= row_cluster,
                           col_cluster= col_cluster,
                           dendrogram_ratio= dendrogram_ratio,
                           cmap= cmap,
                           vmin= vmin,
                           vmax= vmax,
                           cbar_kws= cbar_kws,
                           cbar_pos= cbar_pos,
                           xticklabels= df.columns,
                           yticklabels= df.index,
                           figsize= figsize)
    elif zscore:
        g = sns.clustermap(df,
                           method= cluster_method,
                           z_score= norm_dim,
                           row_cluster= row_cluster,
                           col_cluster= col_cluster,
                           dendrogram_ratio= dendrogram_ratio,
                           cmap= cmap,
                           vmin= vmin,
                           vmax= vmax,
                           cbar_kws= cbar_kws,
                           cbar_pos= cbar_pos,
                           xticklabels= df.columns,
                           yticklabels= df.index,
                           figsize= figsize)
    
    g.ax_heatmap.set_yticks(np.arange(df.shape[0])+0.5)
    g.ax_heatmap.set_xticks(np.arange(df.shape[1])+0.5)
    g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), fontsize=ytick_fontsize, rotation=y_rotation)
    g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), fontsize=xtick_fontsize, rotation=x_rotation)
    g.ax_heatmap.set_xlabel(xlabel, fontsize=fontsize)
    g.ax_heatmap.set_ylabel(ylabel, fontsize=fontsize)
    g.ax_heatmap.tick_params(axis='both', length=1, width=0.5)
    g.ax_heatmap.grid(visible=False)
    
    if cbar:
        x0, y0, _w, _h = g.cbar_pos
        g.ax_cbar.set_position([x0, y0, g.ax_row_dendrogram.get_position().width*cbar_shrink, 0.02*cbar_shrink])
        g.ax_cbar.set_title(cbar_title, fontdict={'fontsize': fontsize/2})
        g.ax_cbar.tick_params(axis='x', labelsize=fontsize/4, length=0.5, width=0.5)
    
    return g

In [None]:
h5files = !ls /common/ingn/SCRNA_RAW_DATA/mouse_RTPD/*.h5

sample_treatment = {
    'B1': 'IgG',
    'B2': 'PD1',
    'B3': 'PD1',
    'B4': 'RTIgG',
    'B5': 'RTPD1',
    'B6': 'RTPD1',
    'C4': 'RTIgG',
    'C5': 'RTPD1',
    'mA1': 'IgG',
    'mB1': 'PD1',
    'mC1': 'PD1',
    'mD1': 'RTIgG',
    'mE1': 'RTPD1',
    'mF1': 'RTPD1',
}

MAX_MITO = 0.1
MIN_GENES = 100
for f in h5files:
    base = f.split('/')[-1]
    outf = f.replace(base, base.replace('_filtered_feature_bc_matrix.h5', '.h5ad'))
    sample_id = base.split('_')[1]
    cohort = 'Cohort1' if 'm' not in sample_id else 'Cohort2'
    print(base, outf, sample_id, sample_treatment[sample_id], cohort)
    
    ad = sc.read_10x_h5(f)
    ad.var_names_make_unique()
    
    ad.obs['sampleID'] = sample_id
    ad.obs['cohort'] = cohort
    ad.obs['treatment'] = sample_treatment[sample_id]
    ad.obs_names = [f'{sample_id}_'+x.split('-')[0] for x in ad.obs_names]
    
    mito_genes = ad.var_names.str.startswith('mt-')
    ad.var_names[mito_genes]
    ad.obs['percent_mito'] = np.sum(ad[:, mito_genes].X, axis=1).A1 / np.sum(ad.X, axis=1).A1

    sc.pp.calculate_qc_metrics(ad, inplace=True)

    max_counts = int(np.quantile(ad.obs.total_counts, 0.99))

    passing_barcodes = (ad.obs.query('percent_mito < @MAX_MITO')
                        .query('n_genes_by_counts > @MIN_GENES')
                        .query('total_counts < @max_counts')
                       ).index.tolist()
    print(ad.shape, len(passing_barcodes))
    
    ad = ad[passing_barcodes].copy()
    ad.write(outf)

In [None]:
import tqdm.auto as tqdm

h5adfiles = !ls /common/ingn/SCRNA_RAW_DATA/mouse_RTPD/*.h5ad
h5adfiles

for h5adf in tqdm.tqdm(h5adfiles):
    outf = h5adf.replace('.h5ad', '_scrublet.csv')
    print(h5adf, outf)
    
    ad = sc.read_h5ad(h5adf)
    sc.pp.normalize_total(ad, target_sum=10000)
    sc.pp.log1p(ad)
    sc.pp.highly_variable_genes(ad, n_top_genes=int(ad.shape[-1] * 0.1))
    
    mat = ad[:,ad.var['highly_variable'].values].X.toarray()

    scrubber = scr.Scrublet(mat, expected_doublet_rate=0.1, sim_doublet_ratio=3, stdev_doublet_rate=0.025)

    scores, predicted = scrubber.scrub_doublets(min_counts=2, min_cells=10, min_gene_variability_pctl=0,
                                                n_prin_comps=30, log_transform=False, mean_center=True, 
                                                synthetic_doublet_umi_subsampling=1., 
                                                use_approx_neighbors=False,
                                                verbose=True
                                               )

    scrublet_results = pd.DataFrame({'scrublet_score': scores, 'called_doublet': predicted, 'rethresholded_doublets': scores>0.25},
                                    index=ad.obs_names
                                   )
    
    scrublet_results.to_csv(outf)
    
    del ad
    del mat
    del scrubber

In [None]:
scrublet_files = !ls /common/ingn/SCRNA_RAW_DATA/mouse_RTPD/*scrublet.csv

ads = []
for hf, f in zip(h5adfiles, scrublet_files):
    df = pd.read_csv(f, index_col=0, header=0)
    
    ad = sc.read_h5ad(hf)
    ad.obs['scrublet_score'] = df['scrublet_score']
    
    non_doublets = df.query('rethresholded_doublets==False').index.tolist()
    
    ad = ad[non_doublets].copy()
    ads.append(ad.copy())
    
    print(f, ad.shape)

adata = anndata.concat(ads)
adata
adata.obs.head()

adata.write('RTPD_mouse_scSeq.h5ad')

In [None]:
adata = sc.read_h5ad(f"/Users/gouink/Documents/RTPD1Manuscript/mouse-scSeq/mouse/RTPD_mouse_scSeq.h5ad")

adata.raw = adata

sc.pp.highly_variable_genes(adata, n_top_genes=4000, subset=True, flavor='seurat_v3')

adata.layers['counts'] = adata[:, adata.var_names].X

scvi.model.SCVI.setup_anndata(adata, 
                              layer= "counts")

other_params = dict(
    dropout_rate= 0.2,
    n_latent=32,
    n_hidden=256,
    n_layers= 2,
    gene_likelihood='nb'
)

plan_params = {
                'lr': 1e-3,
                'n_epochs_kl_warmup': 2
              }

trainer_params = {
                    'check_val_every_n_epoch': 1,
                    'early_stopping': True,
                    'early_stopping_patience': 10,
                    'early_stopping_monitor': 'reconstruction_loss_validation', 
                 }

vae_ref = scvi.model.SCVI(adata, **other_params)
vae_ref.train(max_epochs= 200,
              train_size= 0.9,
              plan_kwargs= plan_params,
              **trainer_params)

adata.obsm["X_scVI"] = vae_ref.get_latent_representation()
sc.pp.neighbors(adata, use_rep="X_scVI", key_added='scvi')
sc.tl.leiden(adata, key_added='leiden', neighbors_key='scvi', resolution=0.6)
sc.tl.umap(adata, neighbors_key='scvi')

sc.pl.umap(
    adata,
    color=["leiden"],
    frameon=False,
    legend_loc= 'on data',
    ncols=1
)

adata.write('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi.h5ad')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi.h5ad')

test = anndata.AnnData(adata.raw.X, var=adata.raw.var, obs=adata.obs)
test.obsm = adata.obsm

sc.pp.normalize_per_cell(test, counts_per_cell_after=10000)
sc.pp.log1p(test)

model = celltypist.models.Model.load(model = 'Adult_Mouse_Gut.pkl')
predictions = celltypist.annotate(test, model = model, majority_voting = True)

test.obs['majority_voting'] = predictions.predicted_labels['majority_voting']

sc.pl.umap(
    test,
    color=["leiden", "majority_voting"],
    frameon=False,
    legend_loc= 'on data',
    ncols=1
)

genes = [
            'Mki67',
            'Epcam',  'Kdr', 'Hbb-bs', 'Vwf',
            'Ptprc', 
            'Ms4a1', 'Cd79a', 
            'Flt3', 'Irf8', 'H2-Eb1', 'Cd274', 'Itgax', 'Ccr7', 'Ly75', 'Il7r', 'Itgae', 'Kit', 'Havcr2', 
            'Vim', 'Pdgfrb', 'Col6a1', 'Col4a1', 'Col4a2', 'Acta2', 'Sdc1',
            'Cd68', 'Itgam',  'C1qa', 'C1qb', 'Trem2', 
            'Nkg7', 'Klrd1', 'Klrk1', 'Prf1', 'Ifng', 'Eomes', 'Runx3',
            'Cd3e', 'Cd3d', 'Cd4', 'Cd8a', 'Pdcd1', 'Foxp3', 'Ctla4', 
            'Csf3r', 'S100a8', 'Cd14', 
            'Ly6d', 'Sell', 'Cd69',  'Ly6c2', 'Ly6g', 'Adgre1', 'Ighg1', 'Tnfrsf17'
        ]
sc.pl.dotplot(test, var_names=genes, groupby='majority_voting', standard_scale='var')

test.obs['majority_voting'].to_csv("/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_celltypist.csv")

celltypist_to_broad = {
                            "Cd206 Mac": "Myeloid",
                            "Inflammatory Monocytes": "Myeloid",
                            "CD8+ T cell": "T/NK cell",
                            "DC (CD103+ CD11-b)": "Myeloid",
                            "Resting B cell": "B cell",
                            "DC": "Myeloid",
                            "DC (CD103+ CD11+b)": "Myeloid",
                            "Ccr7 DC": "Myeloid",
                            "Activated CD4+ T cell": "T/NK cell",
                            "Ly6c2 Mono": "Myeloid",
                            "Clec4e mono": "Myeloid",
                            "Neutrophil": "Myeloid",
                            "pDC": "Myeloid",
                            "NK cell": "T/NK cell",
                            "Resting CD4+ T cells": "T/NK cell",
                            "DC (CD103- C2)": "Myeloid",
                            "Monocytes": "Myeloid",
                            "Macrophage": "Myeloid",
                            "Cd11c Mac": "Myeloid",
                            "Fibroblast": "toss",
                            "capillary Aqp7+": "toss",
                            "EarlyGC_1": "toss",
                            "EarlyGC_2": "toss",
                            "Plasma cell": "Plasma cell"
                      }

test.obs['broad_celltype'] = pd.Categorical(test.obs['majority_voting'].map(celltypist_to_broad))
test.obs['broad_celltype'].value_counts()

sc.pl.umap(
    test,
    color=["leiden", "broad_celltype"],
    frameon=False,
    legend_loc= 'on data',
    ncols=1
)

genes = [
            'Mki67',
            'Epcam',  'Kdr', 'Hbb-bs', 'Vwf',
            'Ptprc', 
            'Ms4a1', 'Cd79a', 
            'Flt3', 'Irf8', 'H2-Eb1', 'Cd274', 'Itgax', 'Ccr7', 'Ly75', 'Il7r', 'Itgae', 'Kit', 'Havcr2', 
            'Vim', 'Pdgfrb', 'Col6a1', 'Col4a1', 'Col4a2', 'Acta2', 'Sdc1',
            'Cd68', 'Itgam',  'C1qa', 'C1qb', 'Trem2', 
            'Nkg7', 'Klrd1', 'Klrk1', 'Prf1', 'Ifng', 'Eomes', 'Runx3',
            'Cd3e', 'Cd3d', 'Cd4', 'Cd8a', 'Pdcd1', 'Foxp3', 'Ctla4', 
            'Csf3r', 'S100a8', 'Cd14', 
            'Ly6d', 'Sell', 'Cd69',  'Ly6c2', 'Ly6g', 'Adgre1', 'Ighg1', 'Tnfrsf17'
        ]
sc.pl.dotplot(test, var_names=genes, groupby='broad_celltype', standard_scale='var')

adata.obs['majority_voting'] = test.obs['majority_voting'].loc[adata.obs.index]
adata.obs['broad_celltype'] = test.obs['broad_celltype'].loc[adata.obs.index]
adata.write('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')
adata = adata[adata.obs.query(" broad_celltype != 'toss' ").index].copy()
tmp = adata.copy()

groupby = 'broad_celltype'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)


## Myeloid

In [None]:
# adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')
# adata = adata[adata.obs.query(" broad_celltype == 'Myeloid' ").index, :].copy()
# adata.raw = adata

# sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True, flavor='seurat_v3')

# adata.layers['counts'] = adata[:, adata.var_names].X

# scvi.model.SCVI.setup_anndata(adata, 
#                               layer= "counts")

# other_params = dict(
#     dropout_rate= 0.2,
#     n_latent=32,
#     n_hidden=256,
#     n_layers= 2,
#     gene_likelihood='nb'
# )

# plan_params = {
#                 'lr': 1e-3,
#                 'n_epochs_kl_warmup': 2
#               }

# trainer_params = {
#                     'check_val_every_n_epoch': 1,
#                     'early_stopping': True,
#                     'early_stopping_patience': 10,
#                     'early_stopping_monitor': 'reconstruction_loss_validation', 
#                  }

# vae_ref = scvi.model.SCVI(adata, **other_params)
# vae_ref.train(max_epochs= 200,
#               train_size= 0.9,
#               plan_kwargs= plan_params,
#               **trainer_params)

# adata.obsm["X_scVI"] = vae_ref.get_latent_representation()
# sc.pp.neighbors(adata, use_rep="X_scVI", key_added='myeloid_scvi')
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')
sc.tl.leiden(adata, key_added='leiden', neighbors_key='myeloid_scvi', resolution=0.4)
# sc.tl.umap(adata, neighbors_key='myeloid_scvi')

sc.pl.umap(
    adata,
    color=["leiden", "majority_voting"],
    legend_loc= 'on data',
    frameon=False,
    ncols=1
)

adata.write('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')

In [None]:
tmp = adata.copy()

groupby = 'leiden'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000)
sc.pp.log1p(adata)

dg = dgex.leiden_dgex(adata, groupby='leiden')
dg.to_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_leiden_dgex.csv')
dg = dgex.leiden_dgex(adata, groupby='majority_voting')
dg.to_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_majority_voting_dgex.csv')

In [None]:
sc.pl.umap(adata,
           color= ['Cd14','Itgax','Mki67','Ly6c2','Adgre1','H2-Ab1','C1qa','Mrc1','Apoe','Trem2','Pltp'],
           cmap= 'inferno',
           size= 3,
           use_raw=False,
           show=False,
           ncols= 4)

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000)
sc.pp.log1p(adata)

GOI = ["H2-Ab1", "C1qa"]
df = pd.DataFrame(adata[:,GOI].X.toarray(), index= adata.obs_names, columns= GOI)
df = df.merge(adata.obs, how='left', left_index= True, right_index= True)

fig, axd = plt.subplot_mosaic(mosaic= [["H2-Ab1"], ['C1qa']],
                              sharex= True,
                              sharey= False,
                              gridspec_kw= {'hspace': 0.05, 'wspace': 0.05},
                              layout= 'constrained',
                              figsize= (2,2))

for g in GOI:
    _= sns.violinplot(data= df,
                        x= 'majority_voting',
                        y= g,
                        hue= 'treatment',
                        order= ['Cd206 Mac', 'Ly6c2 Mono'],
                        hue_order= ['IgG', 'PD1', 'RTIgG', 'RTPD1'],
                        linewidth= 0.5,
                        ax= axd[g])
    _= axd[g].set_xlabel('')
    _= axd[g].set_xticklabels(['Cd206\nMac', 'Inflammatory\nMonocytes'], fontsize= 8)
    _= axd[g].set_ylabel('log norm\nexpression', fontsize= 8)
    _= axd[g].set_title(g, fontsize= 8)
    _= axd[g].set_yticks(np.arange(start= 0, stop= 10, step= 2))
    _= axd[g].set_yticklabels([str(x) for x in np.arange(start= 0, stop= 10, step= 2)], fontsize= 8)
    _= axd[g].grid(visible= False)
    _= axd[g].get_legend().remove()

fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_myeloid_violin.pdf')

In [None]:
tmp = adata.copy()

groupby = 'majority_voting'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)
_= axs.set_xticklabels(order, rotation=90)

In [None]:
counts.query(" treatment == 'IgG' & majority_voting == 'Cd206 Mac' ")['percent'].median()
counts.query(" treatment == 'RTIgG' & majority_voting == 'Cd206 Mac' ")['percent'].median()


In [None]:
57.69736842105263 / 33.500781766545956

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')
test = anndata.AnnData(adata.raw.X, var=adata.raw.var, obs=adata.obs)

adata = test[test.obs.query(" broad_celltype == 'Myeloid' ").index].copy()
sc.pp.filter_genes(adata, min_cells=0.1*(len(adata)))
print(adata.shape)

tmp = pd.DataFrame(index= adata.var_names, columns=['mouse_name','human_name'])
tmp['mouse_name'] = adata.var_names

orth = pd.read_csv('/Users/gouink/Documents/GeneLists/Ensembl_Human_Mouse_Ortholog.txt',index_col=None,header=0)
for i in tmp.index.tolist():
    match = orth.query(" Mousegenename == @i ")['Genename'].tolist()
    if len(match)==0:
        continue
    elif len(match)>1:
        match = match[0]
        tmp.loc[i,'human_name'] = match
    else:
        tmp.loc[i,'human_name'] = match[0]

tmp.dropna(how='any',inplace=True)

all_mouse_genes = tmp['mouse_name'].tolist()
all_human_genes = tmp['human_name'].tolist()

mouse_dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_leiden_dgex.csv', index_col=0, header=0)
human_dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/myeloid_filtered/leiden_dgex.csv', index_col=0, header=0)

olap_df = pd.DataFrame(index= [f'mouse_{x}' for x in sorted(mouse_dgex['group'].unique().tolist())],
                       columns= [f'human_{x}' for x in sorted(human_dgex['group'].unique().tolist())])

for i in olap_df.index:

    g = int(i.split('_')[-1])
    mouse_genes = mouse_dgex.query(" group == @g & pvals_adj < 0.005 & logfoldchanges >= 1.0 & pct_nz_group >= 0.3 ")['names'].tolist()
    mouse_genes = list(set(mouse_genes) & set(all_mouse_genes))
    mouse_ortholog_genes = [tmp.loc[x,'human_name'] for x in mouse_genes]
    num_mouse = len(mouse_ortholog_genes)

    for j in olap_df.columns:
        
        g = int(j.split('_')[-1])
        human_genes = human_dgex.query(" group == @g & pvals_adj < 0.005 & logfoldchanges >= 1.0 & pct_nz_group >= 0.3 ")['names'].tolist()
        human_genes = list(set(human_genes) & set(all_human_genes))
        num_human = len(human_genes)

        overlap = len(set(mouse_ortholog_genes) & set(human_genes))
        pval = scipy.stats.hypergeom.sf(overlap-1, len(all_mouse_genes), num_mouse, num_human)

        olap_df.loc[i,j] = [num_mouse, num_human, overlap, pval]
        # print(f"{i}<->{j}: Mouse: {num_mouse}, Human: {num_human}, Overlap: {overlap}, pval: {pval}")

olap_df

olap_size = olap_df.applymap(lambda x: x[2]/x[0] if x[0]!=0 else 0)
pval = olap_df.applymap(lambda x: -1*np.log10(x[3]))

gf.heatmap2(data= olap_size,
            vmin= 0,
            vmax= olap_size.max().max(),
            cellsize= pval,
            cmap= 'viridis',
            annot= pval,
            annot_kws= {'fontsize': 8, 'weight': 'normal'},
            ref_sizes= [1e-2, 1e-4, 1e-6],
            ref_labels= ['1e-2','1e-4','1e-6'],
            figsize= (8,8),
            fontsize= 12)

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')
test = anndata.AnnData(adata.raw.X, var=adata.raw.var, obs=adata.obs)

adata = test[test.obs.query(" broad_celltype == 'Myeloid' ").index].copy()
sc.pp.filter_genes(adata, min_cells=0.1*(len(adata)))
print(adata.shape)

tmp = pd.DataFrame(index= adata.var_names, columns=['mouse_name','human_name'])
tmp['mouse_name'] = adata.var_names

orth = pd.read_csv('/Users/gouink/Documents/GeneLists/Ensembl_Human_Mouse_Ortholog.txt',index_col=None,header=0)
for i in tmp.index.tolist():
    match = orth.query(" Mousegenename == @i ")['Genename'].tolist()
    if len(match)==0:
        continue
    elif len(match)>1:
        match = match[0]
        tmp.loc[i,'human_name'] = match
    else:
        tmp.loc[i,'human_name'] = match[0]

tmp.dropna(how='any',inplace=True)

all_mouse_genes = tmp['mouse_name'].tolist()
all_human_genes = tmp['human_name'].tolist()

mouse_dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_majority_voting_dgex.csv', index_col=0, header=0)
human_dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/myeloid_filtered/leiden_dgex.csv', index_col=0, header=0)

olap_df = pd.DataFrame(index= [f'mouse_{x}' for x in sorted(mouse_dgex['group'].unique().tolist())],
                       columns= [f'human_{x}' for x in sorted(human_dgex['group'].unique().tolist())])

for i in olap_df.index:

    g = i.split('_')[-1]
    mouse_genes = mouse_dgex.query(" group == @g & pvals_adj < 0.005 & logfoldchanges >= 1.0 & pct_nz_group >= 0.3 ")['names'].tolist()
    mouse_genes = list(set(mouse_genes) & set(all_mouse_genes))
    mouse_ortholog_genes = [tmp.loc[x,'human_name'] for x in mouse_genes]
    num_mouse = len(mouse_ortholog_genes)

    for j in olap_df.columns:
        
        g = int(j.split('_')[-1])
        human_genes = human_dgex.query(" group == @g & pvals_adj < 0.005 & logfoldchanges >= 1.0 & pct_nz_group >= 0.3 ")['names'].tolist()
        human_genes = list(set(human_genes) & set(all_human_genes))
        num_human = len(human_genes)

        overlap = len(set(mouse_ortholog_genes) & set(human_genes))
        pval = scipy.stats.hypergeom.sf(overlap-1, len(all_mouse_genes), num_mouse, num_human)

        olap_df.loc[i,j] = [num_mouse, num_human, overlap, pval]
        # print(f"{i}<->{j}: Mouse: {num_mouse}, Human: {num_human}, Overlap: {overlap}, pval: {pval}")

olap_df

olap_size = olap_df.applymap(lambda x: x[2]/x[0] if x[0]!=0 else 0)
pval = olap_df.applymap(lambda x: -1*np.log10(x[3]))

gf.heatmap2(data= olap_size,
            vmin= 0,
            vmax= olap_size.max().max(),
            cellsize= pval,
            cmap= 'viridis',
            annot= pval,
            annot_kws= {'fontsize': 8, 'weight': 'normal'},
            ref_sizes= [1e-2, 1e-4, 1e-6],
            ref_labels= ['1e-2','1e-4','1e-6'],
            figsize= (8,8),
            fontsize= 12)

## T-cell

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')
adata = adata[adata.obs.query(" broad_celltype == 'T/NK cell' ").index, :].copy()
adata.raw = adata

sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True, flavor='seurat_v3')

adata.layers['counts'] = adata[:, adata.var_names].X

scvi.model.SCVI.setup_anndata(adata, 
                              layer= "counts")

other_params = dict(
    dropout_rate= 0.2,
    n_latent=32,
    n_hidden=256,
    n_layers= 2,
    gene_likelihood='nb'
)

plan_params = {
                'lr': 1e-3,
                'n_epochs_kl_warmup': 2
              }

trainer_params = {
                    'check_val_every_n_epoch': 1,
                    'early_stopping': True,
                    'early_stopping_patience': 10,
                    'early_stopping_monitor': 'reconstruction_loss_validation', 
                 }

vae_ref = scvi.model.SCVI(adata, **other_params)
vae_ref.train(max_epochs= 200,
              train_size= 0.9,
              plan_kwargs= plan_params,
              **trainer_params)

adata.obsm["X_scVI"] = vae_ref.get_latent_representation()
sc.pp.neighbors(adata, use_rep="X_scVI", key_added='tcell_scvi')
sc.tl.leiden(adata, key_added='leiden', neighbors_key='tcell_scvi', resolution=0.3)
sc.tl.umap(adata, neighbors_key='tcell_scvi')

sc.pl.umap(
    adata,
    color=["leiden", "majority_voting"],
    frameon=False,
    ncols=1
)

adata.write('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
test = anndata.AnnData(adata.raw.X, var=adata.raw.var, obs=adata.obs)
test.obsm = adata.obsm

sc.pp.normalize_per_cell(test, counts_per_cell_after=10000)
sc.pp.log1p(test)

_= sc.pl.umap(test,
                color= ['Cd3e','Cd4','Cd8a','Ncr1','Gzma','Mki67','Foxp3','Ctla4','Tnfrsf4','Il10','Pdcd1','Lag3','Tcf7','Prf1','Gzmb'],
                cmap= 'inferno',
                size= 20,
                use_raw=False,
                show=False,
                ncols= 4)

test2 = test[test.obs.query(" majority_voting == 'CD8+ T cell' ").index, :].copy()
rm_clust = test2.obs['leiden'].value_counts().index[test2.obs['leiden'].value_counts() < 2].tolist()
test2 = test2[test2.obs.query(" leiden not in @rm_clust ").index, :].copy()
# dg = dgex.leiden_dgex(test2, groupby='leiden')
# dg.to_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/CD8T_leiden_dgex.csv')
_= sc.pl.violin(test2,
                keys= ['Cd3e','Cd4','Cd8a','Mki67','Pdcd1','Lag3','Tcf7','Prf1','Gzmb'],
                groupby= 'leiden',
                order= ['1','2','3','4'],
                use_raw= False,
                show= False)

test2 = test[test.obs.query(" majority_voting == 'Resting CD4+ T cells' | majority_voting == 'Activated CD4+ T cell' ").index, :].copy()
rm_clust = test2.obs['leiden'].value_counts().index[test2.obs['leiden'].value_counts() < 2].tolist()
test2 = test2[test2.obs.query(" leiden not in @rm_clust ").index, :].copy()
# dg = dgex.leiden_dgex(test2, groupby='leiden')
# dg.to_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/CD4T_leiden_dgex.csv')
_= sc.pl.violin(test2,
                keys= ['Cd3e','Cd4','Cd8a','Mki67','Foxp3','Ctla4','Tnfrsf4','Il10','Pdcd1','Tcf7','Gata3','Il2ra','Il2rb'],
                groupby= 'leiden',
                order= ['0','1','3','4'],
                use_raw= False,
                show= False)

In [None]:
tmp = adata.copy()

groupby = 'leiden'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)
_= axs.set_title("Normalized to total T-cells")

In [None]:
tmp = adata.copy()

groupby = 'majority_voting'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)
_= axs.set_xticklabels(order, rotation=90)
_= axs.set_title("Normalized to total T-cells")

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
tmp = adata[adata.obs.query(" majority_voting == 'CD8+ T cell' ").index, :].copy()

groupby = 'leiden'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)

_= axs.set_title("Normalized to total CD8T")

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
tmp = adata[adata.obs.query(" majority_voting == 'Resting CD4+ T cells' | majority_voting == 'Activated CD4+ T cell' ").index, :].copy()

groupby = 'leiden'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder)

_= axs.set_title("Normalized to total CD4T")

## Final figures

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["leiden"],
    frameon=False,
    legend_loc= 'on data',
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_global_umap_leiden.pdf', bbox_inches='tight')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["majority_voting"],
    frameon=False,
    legend_loc= 'on data',
    legend_fontsize= 2,
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_global_umap_celltypist.pdf', bbox_inches='tight')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["broad_celltype"],
    frameon=False,
    groups= ['B cell', 'Myeloid', 'T/NK cell', 'Plasma cell'],
    legend_loc= 'on data',
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_global_umap_celltypistBroad.pdf', bbox_inches='tight')


# test = anndata.AnnData(adata.raw.X, var=adata.raw.var, obs=adata.obs)
# test.obsm = adata.obsm

# sc.pp.normalize_per_cell(test, counts_per_cell_after=10000)
# sc.pp.log1p(test)

# genes = [
#             'Mki67',
#             'Epcam',  'Kdr', 'Hbb-bs', 'Vwf',
#             'Ptprc', 
#             'Ms4a1', 'Cd79a', 
#             'Flt3', 'Irf8', 'H2-Eb1', 'Cd274', 'Itgax', 'Ccr7', 'Ly75', 'Il7r', 'Itgae', 'Kit', 'Havcr2', 
#             'Vim', 'Pdgfrb', 'Col6a1', 'Col4a1', 'Col4a2', 'Acta2', 'Sdc1',
#             'Cd68', 'Itgam',  'C1qa', 'C1qb', 'Trem2', 
#             'Nkg7', 'Klrd1', 'Klrk1', 'Prf1', 'Ifng', 'Eomes', 'Runx3',
#             'Cd3e', 'Cd3d', 'Cd4', 'Cd8a', 'Pdcd1', 'Foxp3', 'Ctla4', 
#             'Csf3r', 'S100a8', 'Cd14', 
#             'Ly6d', 'Sell', 'Cd69',  'Ly6c2', 'Ly6g', 'Adgre1', 'Ighg1', 'Tnfrsf17'
#         ]

# fig,axs = plt.subplots(figsize=(4,4))
# sc.pl.dotplot(test, 
#               var_names=genes,
#               use_raw= False,
#               groupby='broad_celltype', 
#               standard_scale='var', 
#               dendrogram= True,
#               show= False,
#               ax= axs)
# fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/timecourseRT_v4/figures/timecourse_global_expr_dotplot.pdf')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["leiden"],
    frameon=False,
    legend_loc= 'right margin',
    legend_fontsize= 8,
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_myeloid_umap_leiden.pdf', bbox_inches='tight')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["majority_voting"],
    frameon=False,
    legend_loc= 'right margin',
    legend_fontsize= 8,
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_myeloid_umap_celltypist.pdf', bbox_inches='tight')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')
tmp = adata.copy()

groupby = 'majority_voting'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

# order = sorted(adata.obs[groupby].unique().tolist())
order = ['Cd206 Mac', 'Inflammatory Monocytes']
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(2,3))

_= sns.boxplot(data= counts.reset_index(),
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder,
                   fliersize= 0,
                   linewidth= 0.5)
_= sns.stripplot(data= counts.reset_index(),
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder,
                   dodge= True,
                   s= 3,
                   legend= None)
_= axs.set_xticklabels(order, rotation=90)
_= axs.set_ylim(0, 70)
_= axs.get_legend().remove()
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_myeloid_boxplot_celltypist.pdf', bbox_inches='tight')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_reclustered.h5ad')

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1',
            'B3': 'PD1',
            'B4': 'RTIgG',
            'B5': 'RTPD1',
            'B6': 'RTPD1',
            'C4': 'RTIgG',
            'C5': 'RTPD1',
            'mA1': 'IgG',
            'mB1': 'PD1',
            'mC1': 'PD1',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1',
            'mF1': 'RTPD1'
        }

adata.obs['treatment'] = adata.obs['sampleID'].apply(lambda x: tx_map[x])

counts = adata.obs.groupby(by=['treatment', 'majority_voting']).count()['sampleID']

t1 = 'IgG'
t2 = 'PD1'

total_per_cluster = counts[t1, :] + counts[t2, :]
total = total_per_cluster.sum()

f_exp_IgG = (total_per_cluster / total) * counts[t1,:].sum()
f_exp_RTIgG = (total_per_cluster / total) * counts[t2,:].sum()

result = scipy.stats.chisquare(np.array([counts[t1, :], counts[t2, :]]).T, 
                                np.array([f_exp_IgG, f_exp_RTIgG]).T,
                                axis= 1)

result = pd.DataFrame(result.pvalue, index=counts[t1, :].index)
result

In [None]:
counts['IgG', :].index

In [None]:
counts['RTIgG', :].index

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/combined_RTPDv4_scvi_celltypist.h5ad')
test = anndata.AnnData(adata.raw.X, var=adata.raw.var, obs=adata.obs)

adata = test[test.obs.query(" broad_celltype == 'Myeloid' ").index].copy()
sc.pp.filter_genes(adata, min_cells=0.1*(len(adata)))
print(adata.shape)

tmp = pd.DataFrame(index= adata.var_names, columns=['mouse_name','human_name'])
tmp['mouse_name'] = adata.var_names

orth = pd.read_csv('/Users/gouink/Documents/GeneLists/Ensembl_Human_Mouse_Ortholog.txt',index_col=None,header=0)
for i in tmp.index.tolist():
    match = orth.query(" Mousegenename == @i ")['Genename'].tolist()
    if len(match)==0:
        continue
    elif len(match)>1:
        match = match[0]
        tmp.loc[i,'human_name'] = match
    else:
        tmp.loc[i,'human_name'] = match[0]

tmp.dropna(how='any',inplace=True)

all_mouse_genes = tmp['mouse_name'].tolist()
all_human_genes = tmp['human_name'].tolist()

mouse_dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/myeloid_majority_voting_dgex.csv', index_col=0, header=0)
human_dgex = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/analysis/myeloid_filtered/leiden_dgex.csv', index_col=0, header=0)

olap_df = pd.DataFrame(index= [f'mouse_{x}' for x in sorted(mouse_dgex['group'].unique().tolist())],
                       columns= [f'human_{x}' for x in sorted(human_dgex['group'].unique().tolist())])

for i in olap_df.index:

    g = i.split('_')[-1]
    mouse_genes = mouse_dgex.query(" group == @g & pvals_adj < 0.005 & logfoldchanges >= 1.0 & pct_nz_group >= 0.3 ")['names'].tolist()
    mouse_genes = list(set(mouse_genes) & set(all_mouse_genes))
    mouse_ortholog_genes = [tmp.loc[x,'human_name'] for x in mouse_genes]
    num_mouse = len(mouse_ortholog_genes)

    for j in olap_df.columns:
        
        g = int(j.split('_')[-1])
        human_genes = human_dgex.query(" group == @g & pvals_adj < 0.005 & logfoldchanges >= 1.0 & pct_nz_group >= 0.3 ")['names'].tolist()
        human_genes = list(set(human_genes) & set(all_human_genes))
        num_human = len(human_genes)

        overlap = len(set(mouse_ortholog_genes) & set(human_genes))
        pval = scipy.stats.hypergeom.sf(overlap-1, len(all_mouse_genes), num_mouse, num_human)

        olap_df.loc[i,j] = [num_mouse, num_human, overlap, pval]
        # print(f"{i}<->{j}: Mouse: {num_mouse}, Human: {num_human}, Overlap: {overlap}, pval: {pval}")

olap_df

olap_size = olap_df.applymap(lambda x: x[2]/x[0] if x[0]!=0 else 0)
pval = olap_df.applymap(lambda x: -1*np.log10(x[3]))
pval['reference'] = np.arange(len(pval))
olap_size['reference'] = np.ones(len(pval))

g = gf.heatmap2(data= olap_size,
                vmin= 0,
                vmax= olap_size.max().max(),
                cellsize= pval,
                cmap= 'viridis',
                annot= pval,
                annot_kws= {'fontsize': 8, 'weight': 'normal'},
                ref_sizes= [10, 20 ,30],
                ref_labels= ['1e-2','1e-4','1e-6'],
                figsize= (8,8),
                fontsize= 12)

g.figure.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_myeloid_celltypist_geneintersection.pdf', bbox_inches='tight', transparent= True)

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["leiden"],
    frameon=False,
    legend_loc= 'right margin',
    legend_fontsize= 8,
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_tcell_umap_leiden.pdf', bbox_inches='tight')

fig,axs = plt.subplots(figsize=(2,2))
sc.pl.umap(
    adata,
    color=["majority_voting"],
    frameon=False,
    legend_loc= 'right margin',
    legend_fontsize= 8,
    ncols=1,
    show= False,
    ax= axs
)
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_tcell_umap_celltypist.pdf', bbox_inches='tight')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
tmp = adata.copy()

groupby = 'leiden'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                x= groupby,
                y= 'percent',
                hue= 'treatment',
                order= order,
                hue_order= horder)
_= axs.set_title("Normalized to total T-cells")
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_tcell_leiden_boxplot.pdf', bbox_inches='tight')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
tmp = adata[adata.obs.query(" majority_voting == 'CD8+ T cell' ").index, :].copy()

groupby = 'leiden'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(10,3))

_= sns.boxplot(data= counts,
                x= groupby,
                y= 'percent',
                hue= 'treatment',
                order= order,
                hue_order= horder)

_= axs.set_title("Normalized to total CD8T")
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_CD8T_leiden_boxplot.pdf', bbox_inches='tight')

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')

tmp = adata.copy()

groupby = 'majority_voting'

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1_nonresp',
            'B3': 'PD1_resp',
            'B4': 'RTIgG',
            'B5': 'RTPD1_nonresp',
            'B6': 'RTPD1_resp',
            'C4': 'RTIgG',
            'C5': 'RTPD1_nonresp',
            'mA1': 'IgG',
            'mB1': 'PD1_nonresp',
            'mC1': 'PD1_resp',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1_nonresp',
            'mF1': 'RTPD1_resp'
        }

tmp.obs['treatment'] = tmp.obs['treatment'].astype(str)
tmp.obs['treatment_response'] = tmp.obs['sampleID'].apply(lambda x: tx_map[x])

counts = pf.pct_df_faster(tmp,
                          groupby= groupby,
                          rep= 'sampleID',
                          xcat= 'treatment')
counts

order = sorted(adata.obs[groupby].unique().tolist())
order = ['Resting CD4+ T cells', 'Activated CD4+ T cell']
horder = ['IgG', 'PD1', 'RTIgG', 'RTPD1']

fig,axs = plt.subplots(figsize=(2,3))

_= sns.boxplot(data= counts.reset_index(),
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   hue_order= horder,
                   fliersize= 0,
                   linewidth= 0.5)

_= sns.stripplot(data= counts.reset_index(),
                   x= groupby,
                   y= 'percent',
                   hue= 'treatment',
                   order= order,
                   s= 3,
                   dodge= True,
                   hue_order= horder,
                   legend= None)

_= axs.set_xticklabels(order, rotation=90)
_= axs.set_ylim(0, 70)
_= axs.get_legend().remove()
_= axs.set_title("Normalized to total T-cells")
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_tcell_celltypist_boxplot.pdf', bbox_inches='tight')

In [None]:
# adata_orig = sc.read_h5ad(f"/Users/gouink/Documents/RTPD1Manuscript/mouse-scSeq/mouse/RTPD_mouse_scSeq.h5ad")
# sc.pp.normalize_per_cell(adata_orig, counts_per_cell_after=10000)
# sc.pp.log1p(adata_orig)

# sigs = ['CD8Activation', 'G1', 'G2', 'Cytolytics', 'TerminalDiff']

# for sig in sigs:
#     GOI = pd.read_csv(f'/Users/gouink/Documents/GeneLists/Mouse_{sig}.csv', index_col=0, header=0)
#     GOI = GOI.iloc[:, 1].tolist()
#     sc.tl.score_genes(adata_orig,
#                       gene_list= GOI,
#                       score_name= sig,
#                       use_raw= False)
    
# adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
# adata = adata[adata.obs.query(" majority_voting == 'CD8+ T cell' ").index, :].copy()
# adata.obs[sigs] = adata_orig.obs.loc[adata.obs.index, sigs]

# tx_map = {
#             'B1': 'IgG',
#             'B2': 'PD1',
#             'B3': 'PD1',
#             'B4': 'RTIgG',
#             'B5': 'RTPD1',
#             'B6': 'RTPD1',
#             'C4': 'RTIgG',
#             'C5': 'RTPD1',
#             'mA1': 'IgG',
#             'mB1': 'PD1',
#             'mC1': 'PD1',
#             'mD1': 'RTIgG',
#             'mE1': 'RTPD1',
#             'mF1': 'RTPD1'
#         }

# df = adata.obs.groupby(by=['sampleID']).median(numeric_only=True)
# df['treatment'] = [tx_map[x] for x in df.index]

fig,axs = plt.subplots(nrows= 2,
                       ncols= 3,
                       sharex= True,
                       sharey= False,
                       gridspec_kw={'hspace':0.3, 'wspace':0.5},
                       figsize=(6,3))

for sig, ax in zip(sigs, axs.flat):

    _= sns.boxplot(data= df,
                    x= 'treatment',
                    y= sig,
                    fliersize= 0,
                    order= ['IgG', 'PD1', 'RTIgG', 'RTPD1'],
                    linewidth= 0.5,
                    ax= ax)

    _= sns.stripplot(data= df,
                    x= 'treatment',
                    y= sig,
                    s= 3,
                    order= ['IgG', 'PD1', 'RTIgG', 'RTPD1'],
                    ax= ax)
    
    _= ax.set_xticklabels(['IgG', 'PD1', 'RTIgG', 'RTPD1'], fontsize=8, rotation=45)
    _= ax.set_title(f"{sig}", fontsize=8)
    _= ax.set_xlabel('')
    _= ax.set_ylabel('')
    # _= ax.set_ylim((0, df[sig].max()))
                   
_= fig.suptitle('CD8T Signatures')
fig.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_CD8T_signatures.pdf', bbox_inches='tight')

In [None]:
adata_orig = sc.read_h5ad(f"/Users/gouink/Documents/RTPD1Manuscript/mouse-scSeq/mouse/RTPD_mouse_scSeq.h5ad")
sc.pp.normalize_per_cell(adata_orig, counts_per_cell_after=10000)
sc.pp.log1p(adata_orig)

sigs = ['CD8Activation', 'G1', 'G2', 'Cytolytics', 'TerminalDiff']

for sig in sigs:
    GOI = pd.read_csv(f'/Users/gouink/Documents/GeneLists/Mouse_{sig}.csv', index_col=0, header=0)
    GOI = GOI.iloc[:, 1].tolist()
    sc.tl.score_genes(adata_orig,
                      gene_list= GOI,
                      score_name= sig,
                      use_raw= False)
    
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')
adata = adata[adata.obs.query(" majority_voting == 'CD8+ T cell' ").index, :].copy()
adata.obs[sigs] = adata_orig.obs.loc[adata.obs.index, sigs]

# cellBC = []
# for s in adata.obs['sampleID'].unique():
#     tmp = adata.obs.query(" sampleID == @s ").copy()
#     if len(tmp) <= 100:
#         cellBC += tmp.index.tolist()
#     else:
#         cellBC += tmp.sample(100).index.tolist()

# adata = adata[cellBC]
# adata.obs['sampleID'].value_counts()

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1',
            'B3': 'PD1',
            'B4': 'RTIgG',
            'B5': 'RTPD1',
            'B6': 'RTPD1',
            'C4': 'RTIgG',
            'C5': 'RTPD1',
            'mA1': 'IgG',
            'mB1': 'PD1',
            'mC1': 'PD1',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1',
            'mF1': 'RTPD1'
        }

adata.obs['treatment'] = adata.obs['sampleID'].apply(lambda x: tx_map[x])

sc.pl.violin(adata,
             groupby= 'treatment',
             keys= sigs,
             stripplot= False,
             show= False)
plt.savefig('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/figures/RTPD_CD8T_signatures_violinplot.pdf', bbox_inches='tight')

In [None]:
x1 = adata.obs.query(" treatment == 'IgG' ")['Cytolytics'].to_numpy()
x2 = adata.obs.query(" treatment == 'PD1' ")['Cytolytics'].to_numpy()

scipy.stats.ranksums(x1, x2)

In [None]:
len(x1)
len(x2)

In [None]:
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/RTPD_v4/tcell_reclustered.h5ad')

tx_map = {
            'B1': 'IgG',
            'B2': 'PD1',
            'B3': 'PD1',
            'B4': 'RTIgG',
            'B5': 'RTPD1',
            'B6': 'RTPD1',
            'C4': 'RTIgG',
            'C5': 'RTPD1',
            'mA1': 'IgG',
            'mB1': 'PD1',
            'mC1': 'PD1',
            'mD1': 'RTIgG',
            'mE1': 'RTPD1',
            'mF1': 'RTPD1'
        }

adata.obs['treatment'] = adata.obs['sampleID'].apply(lambda x: tx_map[x])

counts = adata.obs.groupby(by=['treatment', 'majority_voting']).count()['sampleID']

total_per_cluster = counts['IgG', :] + counts['RTIgG', :]
total = total_per_cluster.sum()

f_exp_IgG = (total_per_cluster / total) * counts['IgG',:].sum()
f_exp_RTIgG = (total_per_cluster / total) * counts['RTIgG',:].sum()

result = scipy.stats.chisquare(np.array([counts['IgG', :], counts['RTIgG', :]]).T, 
                                np.array([f_exp_IgG, f_exp_RTIgG]).T,
                                axis= 1)

result = pd.DataFrame(result.pvalue, index=counts['IgG', :].index)
result

In [None]:
f_obs

In [None]:
total_per_cluster