## Annotation of cells from spleen samples

In [None]:
import numpy as np
import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.legend import Legend
import matplotlib.colors as colors
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import pandas as pd
import scipy
import scanpy as sc
import anndata as ad
    
from sklearn import datasets
from sklearn.decomposition import PCA

from numba import jit

import celltypist
from celltypist import models

from matplotlib.cm import ScalarMappable

In [None]:
#Custom colormap

from matplotlib.cm import register_cmap
from matplotlib.colors import ListedColormap

tab20b = matplotlib.colormaps['tab20b']
tab20c = matplotlib.colormaps['tab20c']
colors1 = tab20b(np.linspace(3.001/5., 1, 9))
colors2 = tab20c(np.linspace(0, 3.999/5., 16))

colors = np.concatenate([colors1, colors2])

map_name = 'op_tab25'
op_cmap = ListedColormap(colors, name=map_name )
matplotlib.colormaps.register(name=map_name, cmap=op_cmap)

In [None]:
sc.set_figure_params(scanpy=True, dpi=600, dpi_save=600, frameon=True, vector_friendly=False, fontsize=14,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

In [None]:
#Load Spleen data
adata = sc.read("maranou_032024_spleen_integrated.h5ad")
tissue = 'spleen'

In [None]:
# Create a new index by combining 'sample' column with the current index
adata.obs['unique_cell_name'] = adata.obs['sample'].astype(str) + '_' + adata.obs.index.astype(str)

# Set this new column as the index
adata.obs.index = adata.obs['unique_cell_name']

# Optionally, you might want to drop the 'unique_cell_name' column afterward if it's no longer needed
adata.obs.drop('unique_cell_name', axis=1, inplace=True)

In [None]:
# Fine resolution clustering
sc.tl.leiden(adata, resolution=1.,key_added='leiden_initial')

In [None]:
sc.pl.umap(adata, color=['leiden_initial'], title=[str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
##Refine clustering of 16 and 23
# 2. Identify the cluster(s) containing DC2, monocytes, and macrophages
target_clusters = ['16','23']  # Replace with your actual cluster IDs

# 3. Subset the data
adata_subset = adata[adata.obs['leiden_initial'].isin(target_clusters)].copy()

# 4. Recompute the neighborhood graph on the subset
sc.pp.neighbors(adata_subset)

# 5. Perform Leiden clustering at higher resolution on the subset
sc.tl.leiden(adata_subset, resolution=0.3, key_added='leiden_refined')

# 6. Prepare categories for the combined clustering
initial_categories = list(adata.obs['leiden_initial'].cat.categories)
refined_categories = list(adata_subset.obs['leiden_refined'].cat.categories)

# Remove target clusters from initial categories
initial_categories_filtered = [cat for cat in initial_categories if cat not in target_clusters]

# Create new category names for refined clusters
refined_categories_renamed = [f'r{cat}' for cat in refined_categories]

# Combine filtered initial categories with renamed refined categories
combined_categories = initial_categories_filtered + refined_categories_renamed

# 7. Create new column for combined clustering
adata.obs['leiden_r0'] = pd.Categorical(
    adata.obs['leiden_initial'],
    categories=combined_categories
)

# 8. Update the combined clustering for the refined subset
for idx in adata_subset.obs.index:
    refined_value = adata_subset.obs.loc[idx, 'leiden_refined']
    adata.obs.at[idx, 'leiden_r0'] = f'r{refined_value}'

# 9. Optionally, sort the categories for better readability
adata.obs['leiden_r0'] = adata.obs['leiden_r0'].cat.reorder_categories(sorted(adata.obs['leiden_r0'].cat.categories))

In [None]:
sc.pl.umap(adata, color=['leiden_r0'], title=[str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
##Refine clustering of DC8low DC1 (cDC1 vs monocyte-derived DCs)

target_clusters = ['25']  # Replace with your actual cluster IDs

# 3. Subset the data
adata_subset = adata[adata.obs['leiden_r0'].isin(target_clusters)].copy()

# 4. Recompute the neighborhood graph on the subset
sc.pp.neighbors(adata_subset)

# 5. Perform Leiden clustering at higher resolution on the subset
sc.tl.leiden(adata_subset, resolution=0.3, key_added='leiden_refined')

# 6. Prepare categories for the combined clustering
initial_categories = list(adata.obs['leiden_r0'].cat.categories)
refined_categories = list(adata_subset.obs['leiden_refined'].cat.categories)

# Remove target clusters from initial categories
initial_categories_filtered = [cat for cat in initial_categories if cat not in target_clusters]

# Create new category names for refined clusters
refined_categories_renamed = [f'dc1r{cat}' for cat in refined_categories]

# Combine filtered initial categories with renamed refined categories
combined_categories = initial_categories_filtered + refined_categories_renamed

# 7. Create new column for combined clustering
adata.obs['leiden_r1'] = pd.Categorical(
    adata.obs['leiden_r0'],
    categories=combined_categories
)

# 8. Update the combined clustering for the refined subset
for idx in adata_subset.obs.index:
    refined_value = adata_subset.obs.loc[idx, 'leiden_refined']
    adata.obs.at[idx, 'leiden_r1'] = f'dc1r{refined_value}'

In [None]:
sc.pl.umap(adata, color=['leiden_r1'], title=[str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
##Refine clustering of DC2 cluster (the 'peninsula' is migratory DCs)

target_clusters = ['13']  # Replace with your actual cluster IDs

# 3. Subset the data
adata_subset = adata[adata.obs['leiden_r1'].isin(target_clusters)].copy()

# 4. Recompute the neighborhood graph on the subset
sc.pp.neighbors(adata_subset)

# 5. Perform Leiden clustering at higher resolution on the subset
sc.tl.leiden(adata_subset, resolution=0.1, key_added='leiden_refined')

# 6. Prepare categories for the combined clustering
initial_categories = list(adata.obs['leiden_r1'].cat.categories)
refined_categories = list(adata_subset.obs['leiden_refined'].cat.categories)

# Remove target clusters from initial categories
initial_categories_filtered = [cat for cat in initial_categories if cat not in target_clusters]

# Create new category names for refined clusters
refined_categories_renamed = [f'dc2r{cat}' for cat in refined_categories]

# Combine filtered initial categories with renamed refined categories
combined_categories = initial_categories_filtered + refined_categories_renamed

# 7. Create new column for combined clustering
adata.obs['leiden_r2'] = pd.Categorical(
    adata.obs['leiden_r1'],
    categories=combined_categories
)

# 8. Update the combined clustering for the refined subset
for idx in adata_subset.obs.index:
    refined_value = adata_subset.obs.loc[idx, 'leiden_refined']
    adata.obs.at[idx, 'leiden_r2'] = f'dc2r{refined_value}'

In [None]:
# Set leiden equal to refined clustering
adata.obs['leiden'] = adata.obs['leiden_r2']

In [None]:
sc.pl.umap(adata, color=['leiden'], title=[str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
# dc_clusters = ['r0','r1','r2','20','dc1r0','dc1r1','dc1r2','dc2r0','dc2r1','dc2r2','22','26']
# adata_dc = adata[adata.obs['cell_type'].isin(dc_clusters)]

### Celltypist for initial guesses of cell types

In [None]:
import celltypist
from celltypist import models

adata_human = adata.copy()

#Mouse-human gene conversion
mouse_human_genes = pd.read_csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t", index_col=False)
mouse = mouse_human_genes[mouse_human_genes['Common Organism Name']=='mouse, laboratory']
human = mouse_human_genes[mouse_human_genes['Common Organism Name']=='human']

# Collect dataframes for conversion
mouse = mouse[['DB Class Key', 'Symbol']]
mouse.index = np.arange(mouse.shape[0])

human = human[['DB Class Key', 'Symbol']]
human.index = np.arange(human.shape[0])

gene_list = np.array([])

# Convert from human to mouse
for sym_idx in np.arange(adata.var_names.shape[0]):

    row_idx  = np.where(mouse['Symbol']== adata_human.var_names[sym_idx])[0]

    if len(row_idx)>0:
        
        dbck = mouse['DB Class Key'][row_idx].values[0]
        human_gene = human[human['DB Class Key']==dbck]['Symbol'].values

        if len(human_gene)>0:
            gene_list =  np.append(gene_list,human_gene[0])

        else:

            gene_list =  np.append(gene_list,adata_human.var_names[sym_idx])

    else:

        gene_list =  np.append(gene_list,adata_human.var_names[sym_idx])

sc.pp.normalize_per_cell(adata_human, counts_per_cell_after=1e4)
sc.pp.log1p(adata_human)
np.expm1(adata_human.X).sum(1)

adata_human.var_names = gene_list
predictions = celltypist.annotate(adata_human, model = 'Immune_All_Low.pkl', majority_voting = True)
print(predictions.predicted_labels)
adata_human = predictions.to_adata()

sc.pl.umap(adata_human, color = 'majority_voting', palette='tab20',  legend_loc = 'on data', legend_fontsize=12, legend_fontweight='medium')


## DEGs in clusters

In [None]:
sc.tl.rank_genes_groups(adata,'leiden', method='wilcoxon')

de_markers = sc.get.rank_genes_groups_df(adata, None)
de_markers = de_markers[(de_markers.pvals_adj < 0.03) & (de_markers.logfoldchanges > 1.0)]


In [None]:
de_markers

In [None]:
sc.set_figure_params(scanpy=True, dpi=300, dpi_save=300, frameon=True, vector_friendly=False, fontsize=14,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

In [None]:
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)


In [None]:
cluster ='20'
de_markers[de_markers['group']==cluster].iloc[np.argsort(de_markers[de_markers['group']==cluster]['scores'])[::-1],:][0:20]


In [None]:
# This is for exporting DEGs to act
cluster ='22'
deg_list =list(de_markers[de_markers['group']==cluster].iloc[np.argsort(de_markers[de_markers['group']==cluster]['scores'])[::-1],:][0:20]['names'])
print(*deg_list, sep =',')


In [None]:
# More detailed comparison between subsets, here '20' vs 'dc1r0','dc1r1','dc1r2'
sc.tl.rank_genes_groups(adata, 'leiden', groups = ['20','dc1r0','dc1r1','dc1r2'], reference='rest', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)
de_markers = sc.get.rank_genes_groups_df(adata, None)


In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 1
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.umap(adata[adata.obs['pathogenicity']=='naive'], ax=axs[0], show=False, color=['leiden'], title=['Spleen, Naive'], add_outline=True, outline_width = (0.2,0.5), palette='op_tab25', alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['pathogenicity']=='pathogenic'],  ax=axs[1], show=True, color=['leiden'], title=['Spleen, Pathogenic'],add_outline=True, outline_width = (0.2,0.5), palette='op_tab25', alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')

# plt.tight_layout()
# plt.savefig('umap_spleen_annotations_samples.pdf',dpi=600)
plt.show()

## Final annotations

In [None]:
sc.set_figure_params(scanpy=True, dpi=600, dpi_save=600, frameon=True, vector_friendly=False, fontsize=14,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

In [None]:
# Based on DEA and marker genes form PanglaoDB, Tabula Muris and Cell Marker 2.0 (currently dysfunctional), Annotation of Cell Types: ACT
annotation_dict_low_res = {"0":"Naive B cells",#
                   "1":"Naive B cells",#
                   "2":"MZB and B-1 cells",#
                   "3":"Naive B cells",#
                   "4":"Activated B cells",#
                   "5":"Immature B cells",#
                   "6":"NK cells",#
                   "7":"CD8+",#
                   "8":"Treg",#
                   "9":"Naive B cells",#
                   "10":"CD4+",#
                   "11":"Monocytes and macrophages", #Classical monocytes here
                   "12":"MZB and B-1 cells",#  
                    "dc2r0":"cDC2",#
                   "dc2r1":"cDC2", #
                   "dc2r2":"cDC2",#
                   
   
                   "14":"Tcm",#
                   "15":"Mature follicular B cells", #
                   "17":"Th",#
                   "18":"Monocytes and macrophages", #Nonclassical monocytes
                   "19":"Proliferative B cells",# Activated follicular B cells?
                   "20":"Lymphoid-resident cDC1",#
                   "21":"Heterogenous T cells",#
                   "22":"Neutrophils", #Activated neutrophils or myeloid-derived suppressor cells (MDSCs)
                   "24":"Mast cells",#

                   # The two DC1 clusters are also clearly separated by CyC than Cd8a (which has varied expression in CyC(hi))
                   # Ref: The protease inhibitor cystatin C is differentially expressed among dendritic cell populations, but does not control antigen presentation
                    #El-Sukkari et al. J Immunol. 2003 Nov 15;171(10):5003-11.  doi: 10.4049/jimmunol.171.10.5003. 
                   "dc1r0":"CCR2(int) cDC1",#
                   "dc1r1":"cDC1", #
                   "dc1r2":"cDC1",#
                   
                   "26":"pDC",#
                   "27":"Activated T cells", #With B cell characteristics Cd79a, Pax5, and Ighd
                   "28":"Plasma cells",#
                   "r0":"Undefined DC",#
                   "r1":"Macrophages", #
                   "r2":"DC2",#
                   "r3":"Germinal center B cells",
                   "r4":"Plasma cells",#
                   "r5":"Treg"
                  }

ann_colors = plt.colormaps['tab20'].colors
ann_palette_low_res={"Germinal center B cells": ann_colors[18],
                       "Naive B cells": ann_colors[0],
                       "CD8+":ann_colors[4],
                        "Activated B cells":plt.matplotlib.colors.to_rgb('navy'),
                       "Immature B cells":plt.matplotlib.colors.to_rgb('dodgerblue'),
                       "Activated T cells":ann_colors[2],
                        "Tcm":plt.matplotlib.colors.to_rgb('lightseagreen'),
                       "CD4+":plt.matplotlib.colors.to_rgb('greenyellow'),
                       "Treg":ann_colors[16],
                         "Th":plt.matplotlib.colors.to_rgb('lime'),
                        "Macrophages":ann_colors[3],
                       "Heterogenous T cells":ann_colors[15],
                       "NK cells":ann_colors[17],
                       "Th cells":plt.matplotlib.colors.to_rgb('mediumseagreen'),
                       "MZB and B-1 cells":ann_colors[1],
                       "Mature follicular B cells":plt.matplotlib.colors.to_rgb('navy'),
                        "Undefined DC":ann_colors[5],
                       "DC1":ann_colors[12],
                       "pDC":ann_colors[6],
                       "Plasma cells":ann_colors[19],
                       "Monocytes and macrophages":ann_colors[7], #Probably Classical monocytes
                       "Lymphoid-resident cDC1":plt.matplotlib.colors.to_rgb('palevioletred'), 
                       "cDC1":plt.matplotlib.colors.to_rgb('mediumvioletred'), #
                        "DC2":plt.matplotlib.colors.to_rgb('darkmagenta'),#
                       "cDC2":plt.matplotlib.colors.to_rgb('darkorchid'), #
                       "Proliferative":plt.matplotlib.colors.to_rgb('b'),
                       "Mast cells":plt.matplotlib.colors.to_rgb('cornflowerblue'),
                       "Neutrophils":plt.matplotlib.colors.to_rgb('coral'),
                        "Proliferative B cells":plt.matplotlib.colors.to_rgb('cyan'),
                }

adata.obs['cell_type_low_res'] = adata.obs.leiden.map(annotation_dict_low_res)

In [None]:
sc.pl.umap(adata, color=['cell_type_low_res'], title='Spleen', palette=ann_palette_low_res,legend_loc='on data',add_outline=True, outline_width = (0.2,0.8),legend_fontsize=8, legend_fontweight='heavy', save='_spleen_annotations_low_res.pdf')


In [None]:
# Based on DEA and marker genes form PanglaoDB, Tabula Muris and Cell Marker 2.0 (currently dysfunctional), Annotation of Cell Types: ACT
annotation_dict_high_res = {"0":"Naive B cells",#
                   "1":"Naive B cells",#
                   "2":"MZB and B-1 cells",#
                   "3":"Naive B cells",#
                   "4":"Activated B cells",#
                   "5":"Immature B cells",#
                   "6":"NK cells",#
                   "7":"CD8+",#
                   "8":"Treg",#
                   "9":"Naive B cells",#
                   "10":"CD4+",#
                   "11":"Monocytes and macrophages", #Classical monocytes here
                   "12":"MZB and B-1 cells",#  
                   # "13":"DC2",#
                    "dc2r0":"WDFY4+ cDC2",#
                   "dc2r1":"Relb(low) cDC2", #
                   "dc2r2":"Migratory cDC2",#
                   
   
                   "14":"Tcm",#
                   "15":"Mature follicular B cells", #
                   "17":"Th",#
                   "18":"Monocytes and macrophages", #Nonclassical monocytes
                   "19":"Proliferative B cells",# Activated follicular B cells?
                   "20":"Lymphoid-resident cDC1",#
                   "21":"Heterogenous T cells",#
                   "22":"Neutrophils", #Activated neutrophils or myeloid-derived suppressor cells (MDSCs)
                   "24":"Mast cells",#

                   # The two DC1 clusters are also clearly separated by CyC than Cd8a (which has varied expression in CyC(hi))
                   # Ref: The protease inhibitor cystatin C is differentially expressed among dendritic cell populations, but does not control antigen presentation
                    #El-Sukkari et al. J Immunol. 2003 Nov 15;171(10):5003-11.  doi: 10.4049/jimmunol.171.10.5003. 
                   "dc1r0":"CD8- CCR2(int) cDC1",#
                   "dc1r1":"CD8(low) cDC1", #
                   "dc1r2":"CD8- CCR2(low) cDC1",#
                   
                   "26":"pDC",#
                   "27":"Activated T cells", #With B cell characteristics Cd79a, Pax5, and Ighd
                   "28":"Plasma cells",#
                   "r0":"Undefined DC",#
                   "r1":"Red pulp macrophages", #
                   "r2":"Relb(int.) cDC2",#
                   "r3":"Germinal center B cells",
                   "r4":"Plasma cells",#
                   "r5":"Treg"
                  }

ann_colors = plt.colormaps['tab20'].colors
ann_palette_high_res={"Germinal center B cells": ann_colors[18],
                       "Naive B cells": ann_colors[0],
                       "CD8+":ann_colors[4],
                       "Activated B cells": ann_colors[13],
                       "Immature B cells":plt.matplotlib.colors.to_rgb('dodgerblue'),
                       "Activated T cells":ann_colors[2],
                        "Tcm":plt.matplotlib.colors.to_rgb('lightseagreen'),
                       "CD4+":plt.matplotlib.colors.to_rgb('greenyellow'),
                       "Treg":ann_colors[16],
                         "Th":plt.matplotlib.colors.to_rgb('lime'),
                        "Red pulp macrophages":ann_colors[3],
                       "Heterogenous T cells":ann_colors[15],
                       "NK cells":ann_colors[17],
                       "Th cells":plt.matplotlib.colors.to_rgb('mediumseagreen'),
                       "MZB and B-1 cells":ann_colors[1],
                       "Mature follicular B cells":plt.matplotlib.colors.to_rgb('navy'),
                       "Undefined DC":ann_colors[5],
                       "CCR7+ DC1":ann_colors[12],
                       "pDC":ann_colors[6],
                       "Plasma cells":ann_colors[19],
                       "Monocytes and macrophages":ann_colors[7], #Probably Classical monocytes
                       "Lymphoid-resident cDC1":plt.matplotlib.colors.to_rgb('palevioletred'), #XCR1+ [Gurka et al]
                        "CD8- CCR2+ cDC1":plt.matplotlib.colors.to_rgb('mediumvioletred'),#
                       "CD8(low) cDC1":ann_colors[13], #
                       "CD8- CCR2- cDC1":plt.matplotlib.colors.to_rgb('deeppink'),#
                        "Relb(int.) cDC2":plt.matplotlib.colors.to_rgb('darkmagenta'),#
                        "WDFY4+ cDC2":plt.matplotlib.colors.to_rgb('darkorchid'), #
                        "Relb(low) cDC2":plt.matplotlib.colors.to_rgb('indianred'),#
                       "Migratory cDC2":plt.matplotlib.colors.to_rgb('darkred'), 
                       "Proliferative":plt.matplotlib.colors.to_rgb('b'),
                       "Mast cells":plt.matplotlib.colors.to_rgb('cornflowerblue'),
                       "Neutrophils":plt.matplotlib.colors.to_rgb('coral'),
                        "Proliferative B cells":plt.matplotlib.colors.to_rgb('cyan'),
                }

adata.obs['cell_type_high_res'] = adata.obs.leiden.map(annotation_dict_high_res)

In [None]:
sc.pl.umap(adata, color=['cell_type_high_res'], title='Spleen', palette=ann_palette_high_res,legend_loc='on data',add_outline=True, outline_width = (0.2,0.8),legend_fontsize=8, legend_fontweight='heavy', save='_spleen_annotations_high_res.pdf')


### UMAPS of Naive/Pathogenic WT/KO samples

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/Fig5/spleen_annotations_naive_wt.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], color=['cell_type_low_res'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette_low_res, alpha=0.7, s=12, title='Spleen node, Naive WT',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/Fig5/spleen_annotations_naive_ko.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='ko_naive'], color=['cell_type_low_res'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette_low_res, alpha=0.7, s=12, title='Spleen node, Naive KO',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/Fig5/spleen_annotations_pathogenic_wt.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'], color=['cell_type_low_res'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette_low_res, alpha=0.7, s=12, title='Spleen node, Pathogenic WT',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/Fig5/spleen_annotations_pathogenic_ko.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], color=['cell_type_low_res'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette_low_res, alpha=0.7, s=12, title='Spleen node, Pathogenic KO',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
## WT vs KO side-by-side
#### Define matplotlib Axes
#### Number of Axes & plot size
ncols = 2
nrows = 1
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.umap(adata[adata.obs['WT/KO']=='wt'], ax=axs[0], show=False, color=['cell_type_low_res'], title=['Spleen node, WT'], add_outline=True, outline_width = (0.2,0.8), palette=ann_palette_low_res, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['WT/KO']=='ko'],  ax=axs[1], show=False, color=['cell_type_low_res'], title=['Spleen node, CD74 KO'],add_outline=True, outline_width = (0.2,0.8), palette=ann_palette_low_res, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')

plt.tight_layout()
plt.savefig('umap_spleen_annotations_wt_ko.pdf',dpi=600)
plt.show()

In [None]:
### All four samples in a 2x2 grid
#### Define matplotlib Axes
#### Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['cell_type_low_res'], title=['Spleen node, Naive WT'], add_outline=True, outline_width = (0.2,0.8), palette=ann_palette_low_res, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'],  ax=axs[0,1], show=False, color=['cell_type_low_res'], title=['Spleen node, Pathogenic WT'],add_outline=True, outline_width = (0.2,0.8), palette=ann_palette_low_res, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='ko_naive'],  ax=axs[1,0],show=False, color=['cell_type_low_res'], title=['Spleen node, Naive Cd74 KO'], add_outline=True, outline_width = (0.2,0.8), palette=ann_palette_low_res, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], show=False, ax=axs[1,1],color=['cell_type_low_res'], title=['Spleen node, Pathogenic Cd74 KO'], add_outline=True, outline_width = (0.2,0.8), palette=ann_palette_low_res, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')

plt.tight_layout()
plt.savefig('umap_spleen_annotations_samples.pdf',dpi=600)
plt.show()

In [None]:
# Save data
adata.write_h5ad("maranou_032024_spleen_annotated.h5ad")

## DCs/Monocytes/Macrophages

In [None]:
#Load annotated spleen data
adata = sc.read("maranou_032024_spleen_annotated.h5ad")

In [None]:
DC_subset = ['Undefined DC','Relb(low) cDC2','WDFY4+ cDC2','Lymphoid-resident cDC1','pDC','CD8- CCR2+ cDC1','CD8- CCR2- cDC1','Relb(int.) cDC2','Migratory cDC2',
 'CD8(low) cDC1']
dc_mono_macro = DC_subset+['Monocytes and macrophages']+['Neutrophils']
adata_dc_mono_macro = adata[adata.obs['cell_type_high_res'].isin(dc_mono_macro)].copy()

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/Fig5/Spleen_DCMonoMacro_annotations.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata_dc_mono_macro, color=['cell_type_high_res'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette_high_res, alpha=0.7, s=30, title='DCs/Macrophages/Monocytes',legend_loc='on data',legend_fontsize=12, legend_fontweight='heavy', show=False)
    plt.ylim(-8.25,5.5)
    
    plt.savefig(file_path, bbox_inches="tight")

## Cell proportions in samples

In [None]:
#Load annotated data
adata = sc.read("maranou_032024_spleen_annotated.h5ad")
tissue = 'spleen'

In [None]:
sns.set_style("ticks")

In [None]:
def get_cell_proportions(adata):
    
    cell_counts = adata.obs['cell_type_low_res'].value_counts()
    total_cells = cell_counts.sum()
    return cell_counts / total_cells

# Get cell proportions for both AnnData objects
adata1 = adata[adata.obs['sample']=='wt_naive']
adata2 = adata[adata.obs['sample']=='ko_naive']
adata3 = adata[adata.obs['sample']=='wt_pathogenic']
adata4 = adata[adata.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.6,
           color=ann_palette_low_res[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.015:  # Only label if proportion > 1%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('spleen_cell_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()



In [None]:
list(adata.obs['cell_type_high_res'].unique())

In [None]:
# High res subsets
B_cell_subset = ["Naive B cells","MZB and B-1 cells","Activated B cells","Immature B cells","Mature follicular B cells","Proliferative B cells","Germinal center B cells"]
T_cell_subset = ["CD8+","Treg","CD4+","Tcm","Th","Heterogenous T cells","Activated T cells","Treg"]
DC_subset_hr = ["Lymphoid-resident cDC1",'Undefined DC',"CD8(low) cDC1","CD8- CCR2+ cDC1","CD8- CCR2- cDC1", "Migratory cDC2", "Relb(int.) cDC2","WDFY4+ cDC2","Relb(low) cDC2","pDC"]


In [None]:
list(adata.obs['cell_type_low_res'].unique())

In [None]:
# Low res subsets
B_cell_subset = ["Naive B cells","MZB and B-1 cells","Activated B cells","Immature B cells","Mature follicular B cells","Proliferative B cells","Germinal center B cells"]
T_cell_subset = ["CD8+","Treg","CD4+","Tcm","Th","Heterogenous T cells","Activated T cells","Treg"]
DC_subset_lr = ["Lymphoid-resident cDC1",'Undefined DC',"cDC1", "cDC2","DC2","pDC"]


In [None]:
## Proportions of B cells

adata_subset = adata[adata.obs['cell_type_low_res'].isin(B_cell_subset)].copy()

# Get cell proportions for both AnnData objects
adata1 = adata_subset[adata_subset.obs['sample']=='wt_naive']
adata2 = adata_subset[adata_subset.obs['sample']=='ko_naive']
adata3 = adata_subset[adata_subset.obs['sample']=='wt_pathogenic']
adata4 = adata_subset[adata_subset.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.5,
           color=ann_palette_low_res[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of B Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.015:  # Only label if proportion > 1%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('spleen_B_cell_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()





In [None]:
## Proprotions of T cells

adata_subset = adata[adata.obs['cell_type_low_res'].isin(T_cell_subset)].copy()

# Get cell proportions for both AnnData objects
adata1 = adata_subset[adata_subset.obs['sample']=='wt_naive']
adata2 = adata_subset[adata_subset.obs['sample']=='ko_naive']
adata3 = adata_subset[adata_subset.obs['sample']=='wt_pathogenic']
adata4 = adata_subset[adata_subset.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.5,
           color=ann_palette_low_res[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of T Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.015:  # Only label if proportion > 1.5%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('spleen_T_cell_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()





In [None]:
## Proportions of DCs

adata_subset = adata[adata.obs['cell_type_low_res'].isin(DC_subset_lr)].copy()

# Get cell proportions for both AnnData objects
adata1 = adata_subset[adata_subset.obs['sample']=='wt_naive']
adata2 = adata_subset[adata_subset.obs['sample']=='ko_naive']
adata3 = adata_subset[adata_subset.obs['sample']=='wt_pathogenic']
adata4 = adata_subset[adata_subset.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.5,
           color=ann_palette_low_res[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of DCs', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:

        if df.loc[cell_type, dataset] < 0.015: 
            if df.loc[cell_type, dataset] < 0.01: 
                print(cell_type)            
                ax.text(x[i], bottom + 0.02, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
                
            else:
                
                print(cell_type)            
                ax.text(x[i], bottom + 0.015, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
                
        else:

            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                ha='center', va='center', fontsize=12)
        
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('spleen_DC_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()





In [None]:
## WT vs Cd74 KO

def get_cell_proportions2(adata):
    
    cell_counts = adata.obs['cell_type_low_res'].value_counts()
    total_cells = cell_counts.sum()
    return cell_counts / total_cells

# Get cell proportions for both AnnData objects
adata1 = adata[adata.obs['sample']=='wt_naive']
adata2 = adata[adata.obs['sample']=='wt_pathogenic']
adata3 = adata[adata.obs['sample']=='ko_naive']
adata4 = adata[adata.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions2(adata1)
cell_proportions2 = get_cell_proportions2(adata2)
cell_proportions3 = get_cell_proportions2(adata3)
cell_proportions4 = get_cell_proportions2(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions2.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))



# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.7,
           color=ann_palette_low_res[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['Naive', 'Pathogenic','Naive', 'Pathogenic'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.015:  # Only label if proportion > 1%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)


ax.text(0.25, 1.02, 'WT', transform=ax.transAxes, ha='center', va='bottom', fontsize=14, fontweight='bold')
ax.text(0.77, 1.02, 'Cd74 KO', transform=ax.transAxes, ha='center', va='bottom', fontsize=14, fontweight='bold')
ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits

plt.savefig('spleen_cell_type_proportions_wt_vs_ko.png', dpi=600, bbox_inches='tight')

plt.show()



## Markers used in annotation

### DC

In [None]:
### Dotplot of key markers in DC clusters 
common_cdc_markers = ['Flt3', 'Zbtb46', 'Kit']
cdc1_markers = ['Irf8', 'Batf3', 'Nfil3', 'Id2', 'Bcl6', 'Xcr1', 'Rab43', 'Itgax', 'Itgae', 'Cd24a', 'Cd8a']
cdc2_markers = ['Relb', 'Esam', 'Itgam', 'Irf4', 'Sirpa']
additional_cdc_markers = ['Clec9a','Ccr2', 'Ccr7']
dc_activation_markers = ['Cd40', 'Cd80', 'Cd86'] 
pDC_markers = ['Ptprc','Bst2','Siglech','Tcf4','Zeb2','Irf7','Pacsin1','Spib','Bcl11b']

all_dc_markers = np.sort(common_cdc_markers+cdc1_markers+cdc2_markers+additional_cdc_markers+dc_activation_markers+pDC_markers)

In [None]:
all_dc_markers

In [None]:
adata_dc_lr = adata[adata.obs['cell_type_low_res'].isin(DC_subset_lr)].copy()

sc.tl.dendrogram(adata_dc_lr, groupby='cell_type_low_res')
# sc.pl.dendrogram(adata_dc_lr, groupby='cell_type_low_res')
sc.pl.dotplot(adata_dc_lr, var_names=all_dc_markers, groupby='cell_type_low_res', save='dc_markers_low_res_annotations.pdf', cmap='Blues')


In [None]:
adata_dc_hr = adata[adata.obs['cell_type_high_res'].isin(DC_subset_hr)].copy()

sc.tl.dendrogram(adata_dc_hr, groupby='cell_type_high_res')
# sc.pl.dendrogram(adata_dc_lr, groupby='cell_type_low_res')
sc.pl.dotplot(adata_dc_hr, var_names=all_dc_markers, groupby='cell_type_high_res', save='dc_markers_high_res_annotations.pdf', cmap='Blues')


In [None]:
adata_dc = adata[adata.obs['cell_type_low_res'].isin(DC_subset_lr)].copy()
adata_dc.obs['cell_type']=pd.Categorical(adata_dc.obs['cell_type_low_res']).remove_unused_categories()

adata_dc_pat = adata_dc[adata_dc.obs['pathogenicity']=='pathogenic']

# Create the UMAP plot and capture the axis object that scanpy returns
# fig, axes = plt.subplots(1,3)
sc.pl.umap(adata_dc_pat,
                color=all_dc_markers[0:12],
                cmap='coolwarm',
                add_outline=True,
                outline_width=(0.2, 0.5),
                # palette=ann_palette_all,
                alpha=1,
                s=24,
                # title='Spleen DCs, Ccr7',
                # legend_loc='on data',
                # legend_fontsize=12,
                # legend_fontweight='medium',
                # show=False,
                # ax=axes,
                # vmax=2.5
                )  

# plt.savefig('umap_pathogenic_spleen_dc_Ccr7.pdf', bbox_inches='tight')
plt.show()

### Databases: pdb = PanglaoDB, ct = Cell Taxonomy, act = ACT:Annotation of Cell Types (formerly Cell Marker)

### T cell markers

In [None]:
#Cd8+ T cells
cd8t_genes = ['Cd3e',  'Cd8a']
# adata.obs['cd8T_score'] = adata[:,cd8_genes].X.sum(1)
sc.tl.score_genes(adata, cd8t_genes, score_name='cd8t_cell_score')

#Cd4+ T cells
cd4t_genes = ['Cd3e','Cd4']
# adata.obs['cd4T_score'] = adata[:,cd4_genes].X.sum(1)
sc.tl.score_genes(adata, cd4t_genes, score_name='cd4t_cell_score')

sc.pl.umap(adata, color=['cd8t_cell_score', 'cd4t_cell_score'],palette='tab20',cmap='coolwarm',vmax=2)

In [None]:
#More refined CD8+
# cd8t_cytotoxic_act = ['Cd3d','Cd3e','Cd3g','Cd8a','Ptprc']
cd8t_cytotoxic_act = ['Cd3g','Cd8a','Ptprc']

sc.tl.score_genes(adata, cd8t_cytotoxic_act, score_name='cd8t_cytotoxic_act_score')

cd8t_memory_act=['Arl4c','Gzmm','Klra6','Tnfsf8','Trdc']
sc.tl.score_genes(adata, cd8t_memory_act, score_name='cd8t_memory_act_score')

sc.pl.umap(adata, color=['cd8t_cytotoxic_act_score', 'cd8t_memory_act_score'],palette='tab20',cmap='coolwarm', vmax=1.)

In [None]:
#PanglaoDB
Treg_genes =['Ikzf2','Foxp3','Il2ra', 'Ctla4']
sc.tl.score_genes(adata, Treg_genes, score_name='treg_score_pdb')

Th_genes = ['Ccr4','Il13','Cd28','Cd3g','Ccr5']
# Th_genes = ['Cxcr6','Bcl6','Prdm1','Tbx21'] Ref?
sc.tl.score_genes(adata, Th_genes, score_name='th_score_pdb')

sc.pl.umap(adata, color=['treg_score','th_score'],palette='tab20',cmap='coolwarm', vmax=1.5)

In [None]:
naive_thymus_derived_cd4t_act = ['Igfbp4','Izumo1r','Ly6c1']
sc.tl.score_genes(adata, naive_thymus_derived_cd4t_act, score_name='naive_thymus_derived_cd4t_act_score')

cd25_cd4_treg_act = ['Cd3d','Cd3e','Cd3g','Cd4','Il2ra','Nrp1','Ptprc']
sc.tl.score_genes(adata, cd25_cd4_treg_act , score_name='cd25_cd4_treg_act_score')

sc.pl.umap(adata, color=['naive_thymus_derived_cd4t_act_score','cd25_cd4_treg_act_score'],palette='tab20',cmap='coolwarm', vmax=2.5)


In [None]:
cd4_follicular_helper_t_act = ['Igfbp4','Izumo1r','Ly6c1']
sc.tl.score_genes(adata, cd4_follicular_helper_t_act, score_name='cd4_follicular_helper_t_act_score')

t_helper_2cell_act = ['Il1rl1']
sc.tl.score_genes(adata, t_helper_2cell_act , score_name='t_helper_2cell_act_score')

sc.pl.umap(adata, color=['cd4_follicular_helper_t_act_score','t_helper_2cell_act_score'],palette='tab20',cmap='coolwarm', vmax=1.5)


In [None]:
ncols = 2
nrows = 1
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
sc.pl.umap(adata[adata.obs['pathogenicity']=='naive'], ax=axs[0], show=False, color=['Cd3e'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['pathogenicity']=='pathogenic'], ax=axs[1], color=['Cd3e'],palette='tab20',cmap='coolwarm',vmax=3)


In [None]:
# This shows CTLA4+ CD8+ T cells if present
ctla4_cd8_t = ['Cd8a','Ctla4']
sc.tl.score_genes(adata, ctla4_cd8_t , score_name='ctla4_cd8_t_score')

sc.pl.umap(adata, color=['ctla4_cd8_t_score'],palette='tab20',cmap='coolwarm', vmax=1.5)

In [None]:
# No clear cluster of double negative T cells
#CD3E+ CD8A- CD4-
adata.obs['DNTscore'] = (np.asarray(1-adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(1-adata[:,'Cd4'].X.sum(1)/np.max(adata[:,'Cd4'].X.sum(1))))*np.asarray(adata[:,'Cd3e'].X.sum(1)/np.max(adata[:,'Cd3e'].X.sum(1)))

# Strangely, there seems to be concentration of Cd3- Cd8- Cd4- cells within the T cell cluster
#CD3E- CD8A- CD4-
adata.obs['TNTscore'] = (np.asarray(1 - adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(1 - adata[:,'Cd4'].X.sum(1)/np.max(adata[:,'Cd4'].X.sum(1))))*np.asarray(1- adata[:,'Cd3e'].X.sum(1)/np.max(adata[:,'Cd3e'].X.sum(1)))
ncols = 2
nrows = 1
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
sc.pl.umap(adata, color=['DNTscore'],palette='tab20',cmap='coolwarm', ax=axs[0], show=False)
sc.pl.umap(adata, color=['TNTscore'],palette='tab20',cmap='coolwarm', ax=axs[1],vmax=1)

In [None]:
## Further test for T cell lineage of the 'triple negative T-cell like cells': They do not have TCR.
# 1. Define gene sets
t_cell_genes = ['Tcf7', 'Gata3', 'Lef1', 'Runx3', 'Il7r', 'Ptprc', 'Lck', 'Zap70', 'Lat']
tcr_genes = ['Trbc1', 'Trbc2', 'Cd3d', 'Cd3g']
innate_t_genes = ['Zbtb16']  # PLZF

# 2. Calculate module scores
sc.tl.score_genes(adata, t_cell_genes, score_name='t_cell_score')
sc.tl.score_genes(adata, tcr_genes, score_name='tcr_score')

# 3. Visualize expression of key genes
sc.pl.umap(adata, color=['Cd3e', 'Cd4', 'Cd8a'])
sc.pl.umap(adata, color=['Tcf7', 'Gata3', 'Il7r'])
sc.pl.umap(adata, color=['t_cell_score', 'tcr_score'])


In [None]:
# 5. Examine TCR gene expression
adata.obs['has_tcr'] = ((adata[:, tcr_genes].X > 0).sum(axis=1) > 0).astype(str)
adata.obs['has_tcr'] = adata.obs['has_tcr'].astype('category')
sc.pl.umap(adata, color='has_tcr')

In [None]:
# Th 17 differentiation is enriched in aggregated DC (seen in GOA). Let's see where they are.
Th17_genes = ['Malt1', 'Nfkbiz', 'Nfkbid', 'Nlrp3']
sc.tl.score_genes(adata, Th17_genes, score_name='Th17_score')
sc.pl.umap(adata, color=['Th17_score'])

In [None]:
# 6. Compare with other cell types
sc.tl.dendrogram(adata, groupby='leiden')
sc.pl.dendrogram(adata, groupby='leiden')
sc.pl.dotplot(adata, var_names=t_cell_genes + tcr_genes + innate_t_genes, groupby='leiden')


In [None]:
# 7. Trajectory analysis (if applicable)
sc.tl.paga(adata, groups='leiden')
sc.pl.paga(adata, color='leiden')

In [None]:
# Sell is alias for CD62L (naive/early memory T cell marker for central memory T cells)
Tcm_genes = ['Cd44','Sell','Il7r']
Trm_genes = ['Cd44','Sell','Il7r', 'Cd69','Itgae' ]

adata.obs['memory_score'] = adata[:,Tcm_genes].X.sum(1)
adata.obs['Trm_score'] = adata[:,Trm_genes].X.sum(1)
sc.pl.umap(adata, color=['Ccr7','Cd2','Sell'],palette='tab20',cmap='coolwarm')

sc.pl.umap(adata, color=['memory_score','Trm_score'],palette='tab20',cmap='coolwarm')


## NK cells

In [None]:
nk_act = ['Klrb1c','Ncr1','Klrg1','Cd27','Itgam','Gzma','Eomes',
'Klrb1b','Klre1','Klrk1','Nkg7','Ccl5','Cited4','Commd10','Dusp2',
'Il18rap','Itga2','Khdc1a','Klra1','Klra7','Klrb1a','Klrd1','Mrpl28','Ntpcr','Prf1','Rhoc',
'S1pr5','Xcl1']
# nk_genes = ['Klrb1c', 'Ncr1','Fcgr3','Itgal']
sc.tl.score_genes(adata, nk_act, score_name='nk_act_score')
sc.pl.umap(adata, color=['nk_act_score'],palette='tab20',cmap='coolwarm')

### Naive B

In [None]:
immature_b_act = ['Ly6d']
sc.tl.score_genes(adata, immature_b_act, score_name='immature_b_act_score')

naive_mature_b_act = ['Igkc','Cd40lg','Cr2','Fcer2a']
sc.tl.score_genes(adata, naive_mature_b_act, score_name='naive_mature_b_act_score')

sc.pl.umap(adata, color=['immature_b_act_score', 'naive_mature_b_act_score'],palette='tab20',cmap='coolwarm')


In [None]:
naiveB_genes = ['Ighd', 'Ighm']

memoryB_genes = ['Itgam','Cd80','Cxcr3','Nt5e','Pdcd1lg2']
adata.obs['naiveB_score'] = adata[:,naiveB_genes].X.sum(1)
adata.obs['memoryB_score'] = adata[:,memoryB_genes].X.sum(1)
sc.pl.umap(adata, color=['naiveB_score','memoryB_score'],palette='tab20',cmap='coolwarm')


In [None]:
memoryB_genes2 = ['Aicda']
adata.obs['memoryB_score2'] = adata[:,memoryB_genes2].X.sum(1)

memoryB_genes3 = ['Cd27', 'Cd80', 'Cd86']
adata.obs['memoryB_score3'] = adata[:,memoryB_genes3].X.sum(1)
sc.pl.umap(adata, color=['memoryB_score2','memoryB_score3'],palette='tab20',cmap='coolwarm', vmax=3)



## Age-associated B

In [None]:
ageB_genes = ['Ighm','Il10', 'Ifng', 'Itgax', 'Tbx2', 'Itgam', 'Fas']

adata.obs['ageB_score'] = adata[:,ageB_genes].X.sum(1)
sc.pl.umap(adata, color=['ageB_score'],palette='tab20',cmap='coolwarm',vmax=6)


In [None]:
AAB_markers = ['Ighm', 'Il10', 'Ifng', 'Itgax', 'Tbx2', 'Itgam', 'Fas','Cd19', 'Cd38', 'Bcl6', 'Tbx21', 'Irf4', 'Cxcr5']

adata.obs['AAB_score'] = adata[:,AAB_markers].X.sum(1)
sc.pl.umap(adata, color=['AAB_score'],palette='tab20',cmap='coolwarm',vmax=10)


### Marginal zone and B-1 cells

In [None]:
mzB_genes = ['Cd9','Cr2','Spib']

Zbtb20	1
Zbtb32

adata.obs['mzB_score'] = adata[:,mzB_genes].X.sum(1)
sc.pl.umap(adata, color=mzB_genes,palette='tab20',cmap='coolwarm',vmax=2)

In [None]:
B1_genes = ['Cd5','Spn','Ptpn22']
sc.pl.umap(adata, color=B1_genes,palette='tab20',cmap='coolwarm',vmax=1)

In [None]:
b1_act = ['Zbtb20','Zbtb32']
sc.tl.score_genes(adata, b1_act, score_name='b1_act_score')

sc.pl.umap(adata, color='b1_act_score',palette='tab20',cmap='coolwarm', vmax=2)

### Common innate-like B cell markers

In [None]:
innate_genes = ['Cd19','Cd24a','Cd38']
sc.pl.umap(adata, color=innate_genes,palette='tab20',cmap='coolwarm')

## Follicular B cell markers

In [None]:
folB_genes = ['Cd19', 'Cd79a', 'Ms4a1']
folB_low_genes = ['Prdm1', 'Xbp1']
sc.pl.umap(adata, color=folB_genes,palette='tab20',cmap='coolwarm', vmax=2)
sc.pl.umap(adata, color=folB_low_genes,palette='tab20',cmap='coolwarm', vmax=2)

### DC

In [None]:
dc_clusters = ['r0','r1','r2','20','dc1r0','dc1r1','dc1r2','dc2r0','dc2r1','dc2r2','22','26']
adata_dc = adata[adata.obs['leiden'].isin(dc_clusters)]

In [None]:
adata_dc

In [None]:
sc.pl.umap(adata_dc, color=['Zbtb46','Flt3','Relb'],palette='tab20',cmap='coolwarm')

In [None]:
# CD1
sc.pl.umap(adata_dc, color=['Irf8','Batf3'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_dc, color=['Xcr1','Rab43'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_dc, color=['Cd24a','Cd8a'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_dc, color=['Clec9a','Itgae'],palette='tab20',cmap='coolwarm')

In [None]:

sc.pl.umap(adata_dc, color=['Irf4','Sirpa','Tbx21'],palette='tab20',cmap='coolwarm',vmax=2)
sc.pl.umap(adata_dc, color=['Esam','Klf4','Ccr7'],palette='tab20',cmap='coolwarm',vmax=2)




In [None]:
sc.pl.umap(adata_dc, color=['Itgam','Itgax','Cd4'],palette='tab20',cmap='coolwarm',vmax=2)
sc.pl.umap(adata_dc, color=['Cst3','Wdfy4','Ccr2'],palette='tab20',cmap='coolwarm')


In [None]:
sc.pl.umap(adata_dc[adata_dc.obs['WT/KO']=='wt'], color=['Cst3','Wdfy4','Ccr2'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_dc[adata_dc.obs['WT/KO']=='ko'], color=['Cst3','Wdfy4','Ccr2'],palette='tab20',cmap='coolwarm')


In [None]:
sc.pl.umap(adata_dc[adata_dc.obs['pathogenicity']=='naive'], color=['Cd40','Cd80','Cd86'],palette='tab20',cmap='coolwarm',vmax=2)
sc.pl.umap(adata_dc[adata_dc.obs['pathogenicity']=='pathogenic'], color=['Cd40','Cd80','Cd86'],palette='tab20',cmap='coolwarm',vmax=2)


In [None]:
adata.obs['sample']

In [None]:
sc.pl.umap(adata_dc[adata_dc.obs['sample']=='wt_naive'], color=['Cd40','Cd80','Cd86'],palette='tab20',cmap='coolwarm',vmax=2)
sc.pl.umap(adata_dc[adata_dc.obs['sample']=='ko_naive'], color=['Cd40','Cd80','Cd86'],palette='tab20',cmap='coolwarm',vmax=2)


In [None]:
sc.pl.umap(adata_dc[adata_dc.obs['sample']=='wt_pathogenic'], color=['Cd40','Cd80','Cd86'],palette='tab20',cmap='coolwarm',vmax=2)
sc.pl.umap(adata_dc[adata_dc.obs['sample']=='ko_pathogenic'], color=['Cd40','Cd80','Cd86'],palette='tab20',cmap='coolwarm',vmax=2)


In [None]:
sc.pl.umap(adata_dc, color=['Ptprc','Bst1','Siglech'],palette='tab20',cmap='coolwarm',vmax=2)
sc.pl.umap(adata_dc, color=['Bst2','Tcf4','Zeb2'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_dc, color=['Irf7','Pacsin1','Spib'],palette='tab20',cmap='coolwarm')


In [None]:
# Gene ref: Saito 2022, The role of type-2 conventional dendritic cells in the regulation of tumor immunity
dc1_saito = ['Xcr1','Itgax','Clec9a','Cd8a','Ly75']
dc1_act = ['Cadm1','Xcr1','Clec9a','Irf8']

dc1_shi_etal = ['Xcr1','Tlr3','Clec9a','Cadm1']

sc.tl.score_genes(adata, dc1_saito, score_name='dc1_saito_score')
sc.tl.score_genes(adata, dc1_act, score_name='dc1_act_score')
sc.tl.score_genes(adata, dc1_shi_etal, score_name='dc1_shi_etal_score')

sc.pl.umap(adata, color=['dc1_shi_etal_score','dc1_act_score'],palette='tab20',cmap='coolwarm',vmax=1.5)

# Mouse pDC's express CD11c (Itgax) at low level. cDC2 express Itgax, Itgam and Sirpa
# Shi et al state that Sirpa is expressed in humans, and Cd11b (Itgam) in mice. We do observe Sirpa
dc2_saito = ['Itgax','Itgam','Sirpa']
dc2_act = ['Cd209a','Ccr6','Cd7','Clec10a','Clec12a','Ctnnd2','Dtx1','Epcam','Flt3','Il4i1','Ltb','Relb','Tcf7','Tnf','Zeb2']
dc2_combined = list(np.append(dc2_saito,dc2_act))

sc.tl.score_genes(adata, dc2_saito, score_name='dc2_saito_score')
sc.tl.score_genes(adata, dc2_act, score_name='dc2_act_score')
sc.tl.score_genes(adata, dc2_act, score_name='dc2_combined_score')


sc.pl.umap(adata, color=['dc2_combined_score'],palette='tab20',cmap='coolwarm',vmax=0.8)


In [None]:
de_markers[0:10]

In [None]:
# Check markers in DC1: The DEGs can be used to separate subgroups. We will use Cst3 (Cystatin C, CyC)

#Ref: The protease inhibitor cystatin C is differentially expressed among dendritic cell populations, but does not control antigen presentation
# El-Sukkari et al., J Immunol. 2003 Nov 15;171(10):5003-11.  doi: 10.4049/jimmunol.171.10.5003.

adata_DC1_dsubset = adata[adata.obs['leiden'].isin(['20','dc1r0','dc1r1','dc1r2'])].copy()
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='wt'], color=['Cd8a','Cd4','Ccr2','Ccr7'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='ko'], color=['Cd8a','Cd4','Ccr2','Ccr7'],palette='tab20',cmap='coolwarm',vmin=0)

sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='wt'], color=['Tmsb4x','Cst3','Xcr1','Cd74'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='ko'], color=['Tmsb4x','Cst3','Xcr1','Cd74'],palette='tab20',cmap='coolwarm',vmin=0)


In [None]:
# Three subgroups within the CyC(low) DC1
sc.pl.umap(adata[adata.obs['leiden'].isin(['dc1r0','dc1r1','dc1r2'])], color=['Cd8a','Xcr1','Ccr2', 'Wdfy4'],palette='tab20',cmap='coolwarm',vmin=0,vmax=2.5)

In [None]:
### Beutler lab study Relb(low) cDC2 cells. We also have a subset of DC1 cluster with Cd8a- Xcr1- Ccr2+
sc.pl.umap(adata, color=['Ccr2','Wdfy4'],palette='tab20',cmap='coolwarm',vmax=3)

In [None]:
#Let's check monocyte-derived DC markers (Itgam=Cd11b, Fcgr1=Cd64, Csf1r = Cd115, Adgre = F4/80)
# Elodie Segura, Alice Coillard. Antigen presentation by mouse monocyte-derived cells: Re-evaluating
# the concept of monocyte-derived dendritic cells. Molecular Immunology, 2021, 135, pp.165-169.
# 10.1016/j.molimm.2021.04.012ï¿¿. inserm-03381917

modc = ['Ccr2','Itgam','Fcgr1','Csf1r','Adgre1']

sc.tl.score_genes(adata, modc , score_name='modc_score')

sc.pl.umap(adata, color=['modc_score'],palette='tab20',cmap='coolwarm',vmax=0.8)

In [None]:
pdc_saito = ['Ptprc','Bst2','Siglech']

adata.obs['pdc_score_saito'] = adata[:,pdc_saito].X.sum(1)
sc.pl.umap(adata, color=['pdc_score_saito'],palette='tab20',cmap='coolwarm')


In [None]:

# pdc_genes =['Cd209a','Lag3','Lifr','Tcf4', 'Zeb2', 'Pacsin1', 'Spib']
pdc_act =['Pacsin1','Siglech','Bst2','Ccr9','D13Ertd608e','Tcf4']
sc.tl.score_genes(adata, pdc_act, score_name='pdc_act_score')

migratorydc_act = ['Bcl2l14','Cacnb3','Ccl22','Fscn1']
sc.tl.score_genes(adata, migratorydc_act, score_name='migratorydc_act_score')


sc.pl.umap(adata, color=['pdc_act_score','migratorydc_act_score'],palette='tab20',cmap='coolwarm', vmax=2)


In [None]:
## Division to Cross-presenting and Sirpa+ DC
## See Gurka et al, Front. Immunol., 04 February 2015 Sec. Antigen Presenting Cell Biology
## Volume 6 - 2015 | https://doi.org/10.3389/fimmu.2015.00035

cdc1_genes1 = ['Cd8a','Clec9a','Batf3','Xcr1']

adata.obs['dc_xcr_score'] = (np.asarray(adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(adata[:,'Clec9a'].X.sum(1)/np.max(adata[:,'Clec9a'].X.sum(1))))*np.asarray(adata[:,'Batf3'].X.sum(1)/np.max(adata[:,'Batf3'].X.sum(1)))*np.asarray(adata[:,'Xcr1'].X.sum(1)/np.max(adata[:,'Xcr1'].X.sum(1)))*np.asarray(1.-adata[:,'Sirpa'].X.sum(1)/np.max(adata[:,'Sirpa'].X.sum(1)))

cdc1_genes2 = ['Cd8a','Clec9a','Batf3','Sirpa']
adata.obs['dc_sirpa_score'] = (np.asarray(adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(adata[:,'Clec9a'].X.sum(1)/np.max(adata[:,'Clec9a'].X.sum(1))))*np.asarray(adata[:,'Batf3'].X.sum(1)/np.max(adata[:,'Batf3'].X.sum(1)))*np.asarray(1.-adata[:,'Xcr1'].X.sum(1)/np.max(adata[:,'Xcr1'].X.sum(1)))*np.asarray(adata[:,'Sirpa'].X.sum(1)/np.max(adata[:,'Sirpa'].X.sum(1)))

sc.pl.umap(adata, color=['dc_xcr_score'],palette='tab20',cmap='coolwarm',vmax=0.1)
sc.pl.umap(adata, color=['dc_sirpa_score'],palette='tab20',cmap='coolwarm',vmax=0.1)


In [None]:
### Activated cDCs
acdc_genes = ['Itgax','Cd83' ,'H2-Aa', 'H2-Ab1', 'H2-Eb1']

adata.obs['acdc_score'] = adata[:,acdc_genes].X.sum(1)

sc.pl.umap(adata, color=acdc_genes,palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=['acdc_score'],palette='tab20',cmap='coolwarm')

In [None]:
sc.pl.umap(adata, color=['Zbtb46','Batf3'],palette='tab20',cmap='coolwarm', vmax=2)
sc.pl.umap(adata, color=['Irf4','Irf8'],palette='tab20',cmap='coolwarm',vmax=2)

In [None]:
## Monocyte-derived DC
mddc_genes1 = ['Itgam' ,'Itgax']
mddc_genes2 = ['Il12a','Ly6g']
sc.pl.umap(adata, color=mddc_genes1,palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=mddc_genes2,palette='tab20',cmap='coolwarm')


### Monocytes

In [None]:
clmono_act=['Ccl9', 'Ccr2','Cd68','Ly6c2']
nclmono_act = ['Csf1r','Cx3cr1','Fabp4']

sc.tl.score_genes(adata, clmono_act, score_name='clmono_act_score')
sc.tl.score_genes(adata, nclmono_act , score_name='nclmono_act_score')

sc.pl.umap(adata, color=['clmono_act_score', 'nclmono_act_score'],palette='tab20',cmap='coolwarm', vmax=2)

### Macrophages

In [None]:
#PanglaoDB
macrophage_panglaodb = ['Cd68','Fcgr1','Naaa','Lyz2','Ccl12']

sc.tl.score_genes(adata, macrophage_panglaodb, score_name='macrophage_panglaodb_score')

splenic_macrophage_act = ['Adgre1',
'Cd68',
'Chil3',
'Itgam',
'Ly6g',
'Ace',
'Adgre5',
'Blvrb',
'C1qa',
'C1qb',
'C1qc',
'Cd19',
'Cd3d',
'Cd3e',
'Cd3g',
'Cd7',
'Cd86',
'Cd8a',
'Cebpb',
'Crip2',
'Csf1r',
'Cxcl10',
'Fcgr1',
'Ftl1-ps1',
'Gatm',
'Gbp2',
'Hbb-bt',
'Ifi27l2a',
'Ifitm6',
'Klrb1c',
'Lipc',
'Ly6c1',
'Nop16',
'Pf4',
'Plac8']

sc.tl.score_genes(adata, splenic_macrophage_act, score_name='splenic_macrophage_act_score')

sc.pl.umap(adata, color=['macrophage_panglaodb_score','splenic_macrophage_act_score'],palette='tab20',cmap='coolwarm')


In [None]:
# From Biocompare:
M1_biocompare = ['Il1a', 'Il1b', 'Il6', 'Nos2', 'Tlr2', 'Tlr4', 'Cd80','Cd86']
M2_biocompare = ['Csf1r', 'Mrc1', 'Pparg', 'Arg1', 'Cd163', 'Clec10a', 'Clec7a', 'Pdcd1lg2', 'Retnla']
TAM_biocompare = ['Ccr2', 'Csf1r', 'Marco', 'Pdcd1lg2', 'Cd40', 'Ccl2', 'Csf1', 'Fcgr3', 'Pdgfb']

sc.tl.score_genes(adata, M1_biocompare, score_name='M1_biocompare_score')
sc.tl.score_genes(adata, M2_biocompare, score_name='M2_biocompare_score')
sc.tl.score_genes(adata, TAM_biocompare, score_name='TAM_biocompare_score')

sc.pl.umap(adata, color=['M1_biocompare_score','M2_biocompare_score','TAM_biocompare_score'],
           palette='tab20',cmap='coolwarm', vmax=0.75)


### Plasma cells

In [None]:
plasma_genes = ['Jchain','Sdc1','Prdm1','Xbp1']
adata.obs['plasma_score'] = adata[:,plasma_genes].X.sum(1)

sc.tl.score_genes(adata, M1_biocompare, score_name='M1_biocompare_score')


sc.pl.umap(adata, color=['plasma_score'], palette='tab20',cmap='coolwarm', vmax=3.)

### Mast cells

In [None]:
mast_genes_pdb = ['Kit','Hsd11b1','Tpsab1','Il1rl1','Hdc','Slc29a1']
mast_genes_act_pan_tissue = ['Mcpt8','Cpa3'
,'Fcer1a'
,'Gata2'
,'Ms4a2'
,'Prss34'
,'Ccl3'
,'Ifitm1'
,'Il6'
,'Lmo4'
,'Cd200r3'
,'Ccl4'
,'Cd69'
,'Hdc'
,'Hgf'
,'Il4'
,'Itga2'
,'Ly6e'
,'Srgn']

sc.tl.score_genes(adata, mast_genes_pdb, score_name='mast_score_pdb')
sc.tl.score_genes(adata, mast_genes_act_pan_tissue, score_name='mast_score_act')

#According to ACT, Mcpt8 is a main mast cell marker
sc.pl.umap(adata, color=['mast_score_pdb','mast_score_act'], palette='tab20',cmap='coolwarm', vmax=2)


### Progenitor cells

In [None]:
progenitor_genes = ['Cxcr5', 'Id3','Slamf6','Tcf7']
adata.obs['progenitor_score'] = adata[:,progenitor_genes].X.sum(1)
sc.pl.umap(adata, color=['progenitor_score'], palette='tab20',cmap='coolwarm')

### Neutrophils

In [None]:
neutrophil_genes1 = ['Csf3r','Ly6g']
neutrophil_genes2 = ['S100a8','Il1r2']
sc.pl.umap(adata, color=neutrophil_genes1, palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=neutrophil_genes2, palette='tab20',cmap='coolwarm')

neutrophil_act = ['Ly6g',
'Cxcr2',
'S100a8',
'Apobr',
'Ccl6',
'Cxcl2',
'Cxcr4',
'Hdc',
'Il1b',
'Isg15',
'Notch2',
'Retnlg',
'Rsad2',
'S100a9',
'Wfdc21']

sc.tl.score_genes(adata,neutrophil_act, score_name='neutrophil_act_score')

sc.pl.umap(adata, color=['neutrophil_act_score'],palette='tab20',cmap='coolwarm')


### Basophils

In [None]:
basophil_genes = ['Fcer1a', 'Cpa3','Ms4a2','Gata2','Il3ra','Ccr3','Hdc']

basophil_act_pan_tissue =['Mcpt8','Cpa3'
,'Fcer1a'
,'Gata2'
,'Ms4a2'
,'Prss34'
,'Ccl3'
,'Ifitm1'
,'Il6'
,'Lmo4'
,'Cd200r3'
,'Ccl4'
,'Cd69'
,'Hdc'
,'Hgf'
,'Il4'
,'Itga2'
,'Ly6e'
,'Srgn']

sc.tl.score_genes(adata,basophil_act_pan_tissue, score_name='basophil_act_score')

sc.pl.umap(adata, color=['basophil_act_score'], palette='tab20',cmap='coolwarm',vmax=2)

### Erythrophagocytic macrophages

In [None]:
# The small cluster with high basophil score could also be Erythrophagocytic macrophages. 
# Let's see their markers
ephM_genes = ['Cd68', 'Adgre1', 'Mrc1']
sc.tl.score_genes(adata,ephM_genes, score_name='erythrophagocytic_macrophage_score')

splenic_macrophage_act = ['Adgre1',
'Cd68',
'Chil3',
'Itgam',
'Ly6g',
'Ace',
'Adgre5',
'Blvrb',
'C1qa',
'C1qb',
'C1qc',
'Cd19',
'Cd3d',
'Cd3e',
'Cd3g',
'Cd7',
'Cd86',
'Cd8a',
'Cebpb',
'Crip2',
'Csf1r',
'Cxcl10',
'Fcgr1',
'Ftl1-ps1',
'Gatm',
'Gbp2',
'Hbb-bt',
'Ifi27l2a',
'Ifitm6',
'Klrb1c',
'Lipc',
'Ly6c1',
'Nop16',
'Pf4',
'Plac8']

sc.tl.score_genes(adata, splenic_macrophage_act, score_name='splenic_macrophage_act_score')

sc.pl.umap(adata, color=['erythrophagocytic_macrophage_score','splenic_macrophage_act_score'], palette='tab20',cmap='coolwarm') #Confirms the hypothesis

In [None]:
### Activated macrophages
actM_genes = [
    'Cd80', 'Cd86', 'H2-Ab1', 'H2-DMb1', 'Itgam',
    'Tnf', 'Il1b', 'Il6', 'Ccl2', 
    'F4/80', 'Cd11b', 'Cd68', 
    'Arg1', 'Nos2', 'Irf5', 'Cd274'
]

# actM_genes = [
#     'Cd80', 'Cd86', 'Itgam',
#     'Cd68', 'Cd14']

# Check if the genes are in the dataset
actM_genes = [gene for gene in actM_genes if gene in adata.var_names]

adata.obs['actM_score'] = adata[:,actM_genes].X.sum(1)

# Plot UMAP with marker gene expression
sc.pl.umap(adata, color='actM_score',cmap='coolwarm')

# Show the plot
plt.show()

In [None]:
### Cluster r2: B cell lineage? No
# Cluster 19 with high ribosaomal activity is, on the other hand, clearly B cells

sc.pl.umap(adata, color=['Cd19', 'Ms4a1', 'Cd79a'], palette='tab20',cmap='coolwarm')

### Progenitor cells

In [None]:
progenitor_genes = ['Cxcr5', 'Id3','Slamf6','Tcf7']
adata.obs['progenitor_score'] = adata[:,progenitor_genes].X.sum(1)
sc.pl.umap(adata, color=['progenitor_score'], palette='tab20',cmap='coolwarm')

### NKT cells

In [None]:
sc.pl.umap(adata, color=['Zbtb16','Klrb1c'], palette='tab20',cmap='coolwarm', vmax=2)

In [None]:
NKT_genes = ['Ncam1', 'Gata3', 'Il2rb']
adata.obs['NKT_score'] = adata[:,NKT_genes].X.sum(1)
sc.pl.umap(adata, color=NKT_genes, palette='tab20',cmap='coolwarm', vmax=0.5)

In [None]:
#Invariant alpha chain genes for NKT cells
sc.pl.umap(adata, color=['Trav11','Traj18'], palette='tab20',cmap='coolwarm',vmax=0.5)
sc.pl.umap(adata, color=['Zbtb16','Tbx21'], palette='tab20',cmap='coolwarm', vmax=2)

In [None]:
# common NKT Î² chain genes
sc.pl.umap(adata, color=['Trbv1','Trbv13-2','Trbv29'], palette='tab20',cmap='coolwarm', vmax=1)

### gamma delta T

In [None]:
gamma_delta_genes = ['Tcrg-V4', 'Tcrg-V6', 'Tcrg-V1','Tcrg', 'Tcrd', 'Trgv1', 'Trgv2', 'Trgv3', 'Trgv4', 'Trgv5', 'Trgv6', 'Trgv7','Trdv1', 'Trdv3', 'Trdv4', 'Trdv5','Trg', 'Trd', 'Sox13', 'Id3', 'Blk', 'Il17a', 'Ifng']
present_gamma_delta_genes = [gene for gene in gamma_delta_genes if gene in adata.var_names]
print("Present Î³Î´ T cell genes:", present_gamma_delta_genes)
adata.obs['gamma_delta_score'] = adata[:,present_gamma_delta_genes].X.sum(1)
sc.pl.umap(adata, color=['gamma_delta_score'], palette='tab20',cmap='coolwarm',vmax=4)

In [None]:
# gamma delta T markers
['Tcrg-V4', 'Tcrg-V6', 'Tcrg-V1']
sc.pl.umap(adata, color=['Tcrg-V4', 'Tcrg-V6', 'Tcrg-V1'], palette='tab20',cmap='coolwarm', vmax=0.5)

### Cancer cells

In [None]:
cancer_genes = ['Mlana','Tyr','Pmel']
adata.obs['cancer_cell_score'] = adata[:,cancer_genes].X.sum(1)
#(np.max(adata[:,['Ptprc']].X) - adata[:,['Ptprc']].X.todense())

cancer_stem_cell_genes = ['Cd34']
adata.obs['cancer_stem_cell_score'] = adata[:,cancer_stem_cell_genes].X.sum(1)

sc.pl.umap(adata[adata.obs['pathogenicity']=='pathogenic'], color=['cancer_cell_score'],palette='tab20',cmap='coolwarm', vmax=0.5) 

In [None]:
sc.pl.umap(adata[adata.obs['pathogenicity']=='pathogenic'], color=['Ptprc','cancer_stem_cell_score'],palette='tab20',cmap='coolwarm') 

In [None]:
sc.pl.umap(adata[adata.obs['pathogenicity']=='naive'], color=['Ptprc','cancer_stem_cell_score'],palette='tab20',cmap='coolwarm') 

### Germinal center B cells

In [None]:
# Cluster r3 near the main B cell cluster is splenic germinal center B cells
gcB_genes = ['Bcl6', 'Aicda', 'Fas']
sc.pl.umap(adata, color=gcB_genes, palette='tab20',cmap='coolwarm', vmax=2)