## Annotation of cells from LN samples

In [None]:
import numpy as np
import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.legend import Legend
import matplotlib.colors as colors
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import pandas as pd
import scipy
import scanpy as sc
import anndata as ad

from sklearn import datasets
from sklearn.decomposition import PCA

from numba import jit

import celltypist
from celltypist import models

from matplotlib.cm import ScalarMappable

In [None]:
#Custom colormap

from matplotlib.cm import register_cmap
from matplotlib.colors import ListedColormap

tab20b = matplotlib.colormaps['tab20b']
tab20c = matplotlib.colormaps['tab20c']
colors1 = tab20b(np.linspace(3.001/5., 1, 9))
colors2 = tab20c(np.linspace(0, 3.999/5., 16))

colors = np.concatenate([colors1, colors2])

map_name = 'op_tab25'
op_cmap = ListedColormap(colors, name=map_name )
matplotlib.colormaps.register(name=map_name, cmap=op_cmap)

In [None]:
sc.set_figure_params(scanpy=True, dpi=600, dpi_save=600, frameon=True, vector_friendly=True, fontsize=14,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

In [None]:
#Load lymph node data
adata = sc.read("maranou_032024_LN_integrated.h5ad")
tissue = 'LN'

In [None]:
# Create a new index by combining 'sample' column with the current index
adata.obs['unique_cell_name'] = adata.obs['sample'].astype(str) + '_' + adata.obs.index.astype(str)

# Set this new column as the index
adata.obs.index = adata.obs['unique_cell_name']

# Optionally, you might want to drop the 'unique_cell_name' column afterward if it's no longer needed
adata.obs.drop('unique_cell_name', axis=1, inplace=True)

In [None]:
# 1. Perform initial clustering at lower resolution
sc.tl.leiden(adata, resolution=0.95, key_added='leiden_initial')

# 2. Identify the cluster(s) containing DC2, monocytes, and macrophages
target_clusters = ['16','18'] 

# 3. Subset the data
adata_subset = adata[adata.obs['leiden_initial'].isin(target_clusters)].copy()

# 4. Recompute the neighborhood graph on the subset
sc.pp.neighbors(adata_subset)

# 5. Perform Leiden clustering at higher resolution on the subset
sc.tl.leiden(adata_subset, resolution=0.7, key_added='leiden_refined')

# 6. Prepare categories for the combined clustering
initial_categories = list(adata.obs['leiden_initial'].cat.categories)
refined_categories = list(adata_subset.obs['leiden_refined'].cat.categories)

# Remove target clusters from initial categories
initial_categories_filtered = [cat for cat in initial_categories if cat not in target_clusters]

# Create new category names for refined clusters
refined_categories_renamed = [f'r{cat}' for cat in refined_categories]

# Combine filtered initial categories with renamed refined categories
combined_categories = initial_categories_filtered + refined_categories_renamed

# 7. Create new column for combined clustering
adata.obs['leiden'] = pd.Categorical(
    adata.obs['leiden_initial'],
    categories=combined_categories
)

# 8. Update the combined clustering for the refined subset
for idx in adata_subset.obs.index:
    refined_value = adata_subset.obs.loc[idx, 'leiden_refined']
    adata.obs.at[idx, 'leiden'] = f'r{refined_value}'

# 9. Optionally, sort the categories for better readability
adata.obs['leiden'] = adata.obs['leiden'].cat.reorder_categories(sorted(adata.obs['leiden'].cat.categories))

In [None]:
sc.pl.umap(adata, color=['leiden'], title=[str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
sc.pl.tsne(adata, color=['leiden'], title=[str(tissue) +' Leiden','Naive '+str(tissue)+' WT/KO'], palette='op_tab25', legend_loc='on data')


In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['leiden'], title=['Naive WT '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')
sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'],  ax=axs[0,1], show=False, color=['leiden'], title=['Pathogenic WT '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')
sc.pl.umap(adata[adata.obs['sample']=='ko_naive'],  ax=axs[1,0],show=False, color=['leiden'], title=['Naive Cd74 KO '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'],  ax=axs[1,1],color=['leiden'], title=['Pathogenic Cd74 KO '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.tsne(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['leiden'], title=['Naive WT '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')
sc.pl.tsne(adata[adata.obs['sample']=='wt_pathogenic'],  ax=axs[0,1], show=False, color=['leiden'], title=['Pathogenic WT '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')
sc.pl.tsne(adata[adata.obs['sample']=='ko_naive'],  ax=axs[1,0],show=False, color=['leiden'], title=['Naive Cd74 KO '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')
sc.pl.tsne(adata[adata.obs['sample']=='ko_pathogenic'],  ax=axs[1,1],color=['leiden'], title=['Pathogenic Cd74 KO '+ str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


### Celltypist for making initial guesses for cell types

In [None]:

import celltypist
from celltypist import models

adata_human = adata.copy()

#Mouse-human gene conversion
mouse_human_genes = pd.read_csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t", index_col=False)
mouse = mouse_human_genes[mouse_human_genes['Common Organism Name']=='mouse, laboratory']
human = mouse_human_genes[mouse_human_genes['Common Organism Name']=='human']

# Collect dataframes for conversion
mouse = mouse[['DB Class Key', 'Symbol']]
mouse.index = np.arange(mouse.shape[0])

human = human[['DB Class Key', 'Symbol']]
human.index = np.arange(human.shape[0])

gene_list = np.array([])

# Convert from human to mouse
for sym_idx in np.arange(adata.var_names.shape[0]):

    row_idx  = np.where(mouse['Symbol']== adata_human.var_names[sym_idx])[0]

    if len(row_idx)>0:
        
        dbck = mouse['DB Class Key'][row_idx].values[0]
        human_gene = human[human['DB Class Key']==dbck]['Symbol'].values

        if len(human_gene)>0:
            gene_list =  np.append(gene_list,human_gene[0])

        else:

            gene_list =  np.append(gene_list,adata_human.var_names[sym_idx])

    else:

        gene_list =  np.append(gene_list,adata_human.var_names[sym_idx])

sc.pp.normalize_per_cell(adata_human, counts_per_cell_after=1e4)
sc.pp.log1p(adata_human)
np.expm1(adata_human.X).sum(1)

adata_human.var_names = gene_list
predictions = celltypist.annotate(adata_human, model = 'Immune_All_Low.pkl', majority_voting = True)
print(predictions.predicted_labels)
adata_human = predictions.to_adata()

sc.pl.umap(adata_human, color = 'majority_voting', palette='tab20',  legend_loc = 'on data', legend_fontsize=12, legend_fontweight='medium')


### DEGs in Leiden clusters

In [None]:
sc.tl.rank_genes_groups(adata,'leiden', method='wilcoxon')
# sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)

de_markers = sc.get.rank_genes_groups_df(adata, None)
de_markers = de_markers[(de_markers.pvals_adj < 0.03) & (de_markers.logfoldchanges > 1.0)]


In [None]:
de_markers

In [None]:
sc.set_figure_params(scanpy=True, dpi=300, dpi_save=300, frameon=True, vector_friendly=True, fontsize=14,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

In [None]:
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)


In [None]:
# Give cluster name to see DEGs compared to other clusters
cluster ='10'
de_markers[de_markers['group']==cluster].iloc[np.argsort(de_markers[de_markers['group']==cluster]['scores'])[::-1],:][0:50]


In [None]:
sc.pl.umap(adata, color=['leiden'], title=[str(tissue) +' Leiden'], palette='op_tab25', legend_loc='on data')


In [None]:
#Leiden refined annotations

annotation_dict = {"0":"Antigen-presenting B cells",
                       "1":"Naive B cells", #high expression of MHC Class II genes (H2-Aa, H2-Ab1, H2-Eb1) and Cd74
                       "2":"CD8+",
                       "3":"Activated B cells/Early plasmablasts",
                       "4":"Developing B cells",
                       "5":"Activated CD8+", # These are Ctla4-
                       "6":"CD8+",
                       "7":"CTLA4+ CD8+ T cells",
                       "8":"CD4+",
                       "9":"Treg",
                       #"10":"CD8-CD4-CD3E- T cell-like", #Have T cell lineage but some are CD8- CD4- CD3E-
                       "10":"Heterogeneous T cells", 
                       "11":"NK cells",
                       "12":"Th cells",
                       "13":"MZB and B-1 cells",
                       "14":"Interferon-activated B cells",
                       "15":"Activated CD4+",
                       "17":"Activated CD4+",
                       "19":"$\gamma \delta$T cells",
                       "20":"CCR7hi CD40+ CD80+ CD86+ DC",
                       "21":"pDC",
                       "22":"Plasma cells",
                       "r0":"Monocytes and macrophages", #Probably Classical monocytes
                       "r1":"cDC2", # 
                       "r2":"Lymphoid-resident cDC1", #XCR1+ [Gurka et al]
                       "r3":"Undefined DC", #[Gurka et al]
                       "r4":"CCR7hi CD40+ CD80- CD86- DC", # Properties of both cDC and migratory 
                       "r5":"cDC2",
                       "r6":"Plasma cells", # Previously "High proliferation/synthesis". Marked by Rpl/Rps hi
                       "r7":"Mast cells",
                       "r8":"Neutrophils",
                       "r9":"Monocytes and macrophages", #Probably Non-classical monocytes
                       }

ann_colors = plt.colormaps['tab20'].colors
ann_palette={"Antigen-presenting B cells": ann_colors[18],
                       "Naive B cells": ann_colors[0],
                       "CD8+":ann_colors[4],
                       "Activated B cells/Early plasmablasts": ann_colors[13],
                       "Developing B cells":plt.matplotlib.colors.to_rgb('dodgerblue'),
                       "Activated CD8+":ann_colors[2],
                       "CD4+":plt.matplotlib.colors.to_rgb('greenyellow'),
                       "Treg":ann_colors[16],
                        "CTLA4+ CD8+ T cells":ann_colors[3],
                        #"CD8-CD4-CD3E- T cell-like":ann_colors[15],
                        "Heterogeneous T cells":ann_colors[15],
                       "NK cells":ann_colors[17],
                       "Th cells":plt.matplotlib.colors.to_rgb('mediumseagreen'),
                       "MZB and B-1 cells":ann_colors[1],
                       "Interferon-activated B cells":ann_colors[9],
                       "Activated CD4+":plt.matplotlib.colors.to_rgb('greenyellow'),
                       "$\gamma \delta$T cells":ann_colors[5],
                       "CCR7hi CD40+ CD80+ CD86+ DC":ann_colors[12],
                       "pDC":ann_colors[6],
                       "Plasma cells":ann_colors[19],
                       "Monocytes and macrophages":ann_colors[7], #Probably Classical monocytes
                       "Lymphoid-resident cDC1":plt.matplotlib.colors.to_rgb('palevioletred'), 
                       "Undefined DC":plt.matplotlib.colors.to_rgb('mediumvioletred'), #
                       "CyC(low) DC1":ann_colors[0], 
                       "cDC2":plt.matplotlib.colors.to_rgb('darkorchid'),
                       "CCR7hi CD40+ CD80- CD86- DC":plt.matplotlib.colors.to_rgb('deeppink'), 
                       "High proliferation/synthesis":plt.matplotlib.colors.to_rgb('b'),
                       "Mast cells":plt.matplotlib.colors.to_rgb('cornflowerblue'),
                       "Neutrophils":plt.matplotlib.colors.to_rgb('coral'),}


In [None]:
adata.obs['cell_type'] = adata.obs.leiden.map(annotation_dict)

In [None]:
sc.pl.umap(adata, color=['cell_type'],add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10, title='Lymph node',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', save='_LN_annotations.pdf')


### UMAPS of Naive/Pathogenic WT/KO samples

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/SI_Fig7/LN_annotations_naive_wt.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], color=['cell_type'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette, alpha=0.75, s=12, title='Lymph node, Naive WT',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/SI_Fig7/LN_annotations_naive_ko.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='ko_naive'], color=['cell_type'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette, alpha=0.75, s=12, title='Lymph node, Naive Cd74 KO',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/SI_Fig7/LN_annotations_pathogenic_wt.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'], color=['cell_type'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette, alpha=0.75, s=12, title='Lymph node, Pathogenic WT',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/SI_Fig7/LN_annotations_pathogenic_ko.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], color=['cell_type'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette, alpha=0.75, s=12, title='Lymph node, Pathogenic Cd74 KO',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 1
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'],  ax=axs[0], show=False, color=['cell_type'], title=['Lymph node, Pathogenic WT'], add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], ax=axs[1],show=False, color=['cell_type'], title=['Lymph node, Pathogenic Cd74 KO'], add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')

plt.tight_layout()
plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/umap_LN_annotations_pathogenic_wt_ko.pdf',dpi=600)
plt.show()

In [None]:
# Define matplotlib Axes
# Number of Axes & plot size
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
   ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)

sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['cell_type'], title=['Lymph node, Naive WT'], add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'],  ax=axs[0,1], show=False, color=['cell_type'], title=['Lymph node, Pathogenic WT'],add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='ko_naive'],  ax=axs[1,0],show=False, color=['cell_type'], title=['Lymph node, Naive Cd74 KO'], add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], show=False, ax=axs[1,1],color=['cell_type'], title=['Lymph node, Pathogenic Cd74 KO'], add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10,legend_loc='on data',legend_fontsize=12, legend_fontweight='medium')

plt.tight_layout()
plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/umap_LN_annotations_samples.pdf',dpi=600)
plt.show()


In [None]:
# Save data
adata.write_h5ad("maranou_032024_LN_annotated.h5ad")

## DCs/Monocytes/Macrophages

In [None]:
#Load annotated lymph node data
adata = sc.read("maranou_032024_LN_annotated.h5ad")

In [None]:
DC_subset = ['CCR7hi CD40+ CD80- CD86- DC','CCR7hi CD40+ CD80+ CD86+ DC','Lymphoid-resident cDC1','cDC2','pDC','Undefined DC']
dc_mono_macro = DC_subset+['Monocytes and macrophages']+['Neutrophils']
adata_dc_mono_macro = adata[adata.obs['cell_type'].isin(dc_mono_macro)].copy()

In [None]:
file_path = '/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/SI_Fig7/LN_DCMonoMacro_annotations.pdf'
with plt.rc_context():  # Use this to set figure params like size and dpi
    sc.pl.umap(adata_dc_mono_macro, color=['cell_type'],add_outline=True, outline_width = (0.2,0.3), palette=ann_palette, alpha=0.75, s=30, title='Lymph node myeloid cells',legend_loc='on data',legend_fontsize=12, legend_fontweight='heavy', show=False)
    plt.savefig(file_path, bbox_inches="tight")

## Cell proportions in samples

In [None]:
sns.set_style("ticks")

In [None]:
# Get cell type counts
cell_counts = adata[adata.obs['sample']=='wt_naive'].obs['cell_type'].value_counts()

# Calculate proportions
total_cells = cell_counts.sum()
cell_proportions = cell_counts / total_cells

# Sort cell types by proportion (descending)
cell_proportions_sorted = cell_proportions.sort_values(ascending=True)

# Create the plot
fig, ax = plt.subplots(figsize=(6, 6))

# Plot the stacked bar
bottom = 0
top = 1
for cell_type, proportion in cell_proportions_sorted.items():
    ax.bar(0, proportion, bottom=bottom, width=0.5, alpha=0.6,
           color=ann_palette[cell_type], label=cell_type)
    bottom += proportion
    top -= proportion

# Customize the plot
ax.set_ylabel('Proportion of Cells')
ax.set_title('Cell Type Proportions', fontsize=16)
ax.set_xlim(-0.25, 0.25)
ax.set_xticks([])  # Remove x-axis ticks

bottom = 0
top = 1
# Add cell count and proportion labels
for i, (cell_type, proportion) in enumerate(cell_proportions_sorted.items()):
    count = cell_counts[cell_type]
    y_position = bottom + proportion/2

    if proportion>0.015:
        ax.text(0, y_position, f'{cell_type} {count} ({proportion:.1%})', 
                ha='center', va='center', fontsize=12)
    bottom += proportion
    top -= proportion

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.tight_layout()
plt.show()

# Optionally, save the figure
# plt.savefig('cell_type_proportions_stacked.png', dpi=300, bbox_inches='tight')

In [None]:
def get_cell_proportions(adata):
    
    cell_counts = adata.obs['cell_type'].value_counts()
    total_cells = cell_counts.sum()
    return cell_counts / total_cells

# Get cell proportions for both AnnData objects
adata1 = adata[adata.obs['sample']=='wt_naive']
adata2 = adata[adata.obs['sample']=='ko_naive']
adata3 = adata[adata.obs['sample']=='wt_pathogenic']
adata4 = adata[adata.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.6,
           color=ann_palette[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.015:  # Only label if proportion > 1%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/LN_cell_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()



In [None]:
list(adata.obs['cell_type'].unique())

In [None]:
T_cell_subset = ["CD8+",'Treg','CD4+','Th cells','Heterogeneous T cells','CTLA4+ CD8+ T cells','$\\gamma \\delta$T cells','Activated CD4+','Activated CD8+']
DC_subset = ['CCR7hi CD40+ CD80- CD86- DC','CCR7hi CD40+ CD80+ CD86+ DC','Lymphoid-resident cDC1','cDC2','pDC','Undefined DC']
B_cell_subset = ["Anti?gen-presenting B cells","Naive B cells","Activated B cells/Early plasmablasts","Developing B cells","MZB and B-1 cells","Interferon-activated B cells"]


In [None]:
## Proportions of B cells

adata_subset = adata[adata.obs['cell_type'].isin(B_cell_subset)].copy()

# Get cell proportions for both AnnData objects
adata1 = adata_subset[adata_subset.obs['sample']=='wt_naive']
adata2 = adata_subset[adata_subset.obs['sample']=='ko_naive']
adata3 = adata_subset[adata_subset.obs['sample']=='wt_pathogenic']
adata4 = adata_subset[adata_subset.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.5,
           color=ann_palette[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of B Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.01:  # Only label if proportion > 1%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/LN_B_cell_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()





In [None]:
## Proprotions of T cells

adata_subset = adata[adata.obs['cell_type'].isin(T_cell_subset)].copy()

# Get cell proportions for both AnnData objects
adata1 = adata_subset[adata_subset.obs['sample']=='wt_naive']
adata2 = adata_subset[adata_subset.obs['sample']=='ko_naive']
adata3 = adata_subset[adata_subset.obs['sample']=='wt_pathogenic']
adata4 = adata_subset[adata_subset.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.5,
           color=ann_palette[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of T Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.01:  # Only label if proportion > 1.5%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/LN_T_cell_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()





In [None]:
## Proportions of DCs

adata_subset = adata[adata.obs['cell_type'].isin(DC_subset)].copy()

# Get cell proportions for both AnnData objects
adata1 = adata_subset[adata_subset.obs['sample']=='wt_naive']
adata2 = adata_subset[adata_subset.obs['sample']=='ko_naive']
adata3 = adata_subset[adata_subset.obs['sample']=='wt_pathogenic']
adata4 = adata_subset[adata_subset.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions2.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))

ax.set_xlim(0., 4.0)  # Adjust x-axis limits

# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.5,
           color=ann_palette[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of DCs', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['WT', 'Cd74 KO','WT', 'Cd74 KO'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:

        if df.loc[cell_type, dataset] < 0.015: 
            if df.loc[cell_type, dataset] < 0.01: 
                print(cell_type)            
                ax.text(x[i], bottom + 0.02, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
                
            else:
                
                print(cell_type)            
                ax.text(x[i], bottom + 0.015, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
                
        else:

            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                ha='center', va='center', fontsize=12)
        
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits
ax.text(0.25, 1.02, 'Naive', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')
ax.text(0.77, 1.02, 'Pathogenic', transform=ax.transAxes, ha='center', va='bottom', fontsize=16, fontweight='bold')

# Optionally, save the figure
plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/LN_DC_type_proportions_naive_vs_pathogenic.pdf', dpi=600, bbox_inches='tight')

plt.show()





In [None]:
## WT vs Cd74 KO

def get_cell_proportions(adata):
    
    cell_counts = adata.obs['cell_type'].value_counts()
    total_cells = cell_counts.sum()
    return cell_counts / total_cells

# Get cell proportions for both AnnData objects
adata1 = adata[adata.obs['sample']=='wt_naive']
adata2 = adata[adata.obs['sample']=='wt_pathogenic']
adata3 = adata[adata.obs['sample']=='ko_naive']
adata4 = adata[adata.obs['sample']=='ko_pathogenic']

cell_proportions1 = get_cell_proportions(adata1)
cell_proportions2 = get_cell_proportions(adata2)
cell_proportions3 = get_cell_proportions(adata3)
cell_proportions4 = get_cell_proportions(adata4)

# Combine all cell types from both datasets
all_cell_types = sorted(set(cell_proportions1.index) | set(cell_proportions2.index) | 
                        set(cell_proportions3.index) | set(cell_proportions4.index))
# Create a DataFrame with proportions from all datasets
df = pd.DataFrame({
    'WT Naive': cell_proportions1.reindex(all_cell_types).fillna(0),
    'WT Pathogenic': cell_proportions2.reindex(all_cell_types).fillna(0),
    'KO Naive': cell_proportions3.reindex(all_cell_types).fillna(0),
    'KO Pathogenic': cell_proportions4.reindex(all_cell_types).fillna(0)
})

# Sort by the maximum proportion across all datasets
df = df.sort_values(by=df.columns.tolist(), ascending=False)

# Create the plot
fig, ax = plt.subplots(figsize=(15, 8))



# Plot the stacked bars
x = [0.1, 0.9, 1.9, 2.7]  # x-coordinates for the four bars
width = 0.7  # width of the bars

for cell_type in df.index:
    bottoms = [df.loc[:cell_type, col].sum() - df.loc[cell_type, col] for col in df.columns]
    
    ax.bar(x, df.loc[cell_type], bottom=bottoms, width=width, alpha=0.7,
           color=ann_palette[cell_type], label=cell_type if x[0] == 0 else "")

# Customize the plot
ax.set_ylabel('Proportion of Cells', fontsize=14)
# ax.set_title('Cell Type Proportions Comparison', fontsize=18)
ax.set_xticks(x)
ax.set_xticklabels(['Naive', 'Pathogenic','Naive', 'Pathogenic'], fontsize=12, rotation=0, ha='center')
ax.set_xlim(-0.5, 3.5)

# Add cell type labels
for i, dataset in enumerate(df.columns):
    bottom = 0
    for cell_type in df.index:
        if df.loc[cell_type, dataset] > 0.015:  # Only label if proportion > 1%
            ax.text(x[i], bottom + df.loc[cell_type, dataset]/2, 
                    f'{cell_type} {df.loc[cell_type, dataset]:.1%}', 
                    ha='center', va='center', fontsize=12)
        bottom += df.loc[cell_type, dataset]

# Add a legend
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., fontsize=10)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)


ax.text(0.25, 1.02, 'WT', transform=ax.transAxes, ha='center', va='bottom', fontsize=14, fontweight='bold')
ax.text(0.77, 1.02, 'Cd74 KO', transform=ax.transAxes, ha='center', va='bottom', fontsize=14, fontweight='bold')
ax.set_xlim(-0.4, 3.1)  # Adjust x-axis limits

plt.savefig('/Users/oipulk/Documents/scRNASeq/data/Eleftheria_Maranou_Mar2024/analysis/figures/annotations/LN/LN_cell_type_proportions_wt_vs_ko.pdf', dpi=600, bbox_inches='tight')

plt.show()




## DC marker expression in DC clusters

In [None]:
### Dotplot of key markers in DC clusters 
common_cdc_markers = ['Flt3', 'Zbtb46', 'Kit']
cdc1_markers = ['Irf8', 'Batf3', 'Nfil3', 'Id2', 'Bcl6', 'Xcr1', 'Rab43', 'Itgax', 'Itgae', 'Cd24a', 'Cd8a']
cdc2_markers = ['Relb', 'Esam', 'Itgam', 'Irf4', 'Sirpa']
additional_cdc_markers = ['Clec9a','Ccr2', 'Ccr7']
dc_activation_markers = ['Cd40', 'Cd80', 'Cd86'] 
pDC_markers = ['Ptprc','Bst2','Siglech','Tcf4','Zeb2','Irf7','Pacsin1','Spib','Bcl11b']

all_dc_markers = np.sort(common_cdc_markers+cdc1_markers+cdc2_markers+additional_cdc_markers+dc_activation_markers+pDC_markers)

In [None]:
adata_dc_hr = adata[adata.obs['cell_type'].isin(DC_subset)].copy()

sc.tl.dendrogram(adata_dc_hr, groupby='cell_type')
# sc.pl.dendrogram(adata_dc_lr, groupby='cell_type_low_res')
sc.pl.dotplot(adata_dc_hr, var_names=all_dc_markers, groupby='cell_type', cmap='Blues', save='LN_dc_markers_annotations.pdf')


In [None]:
sc.pl.umap(adata_dc_hr, color=['cell_type'],add_outline=True, outline_width = (0.2,0.5), palette=ann_palette, alpha=0.7, s=10, title='Lymph node',legend_loc='on data',legend_fontsize=8, legend_fontweight='heavy', save='_LN_DC_annotations.pdf')


## Markers used in annotation

## T cell markers

In [None]:
# cd8_genes = ['Cd3e', 'Cd160', 'Cd8a', 'Pdcd1']

#  TM tissue specific markers 
# cd8_genes = ['Cd3e',  'Cd8a','Gzma', 'Prf1']
# adata.obs['cd8_score'] = adata[:,cd8_genes].X.sum(1)

#Cd8+ T cells
cd8_genes = ['Cd3e',  'Cd8a']
adata.obs['cd8_score'] = adata[:,cd8_genes].X.sum(1)

#Cd4+ T cells
cd4_genes = ['Cd3e','Cd4']
adata.obs['cd4_score'] = adata[:,cd4_genes].X.sum(1)

Treg_genes =['Cd4','Foxp3', 'Ctla4']
adata.obs['Treg_score'] = adata[:,Treg_genes].X.sum(1)

Th_genes = ['Cxcr6','Bcl6','Prdm1','Tbx21']
adata.obs['Th_score'] = adata[:,Th_genes].X.sum(1)

cd8effector_genes = ['Cd8a','Cx3cr1','Gzmb','Lgals1','S1pr5']
adata.obs['cd8effector_score'] = adata[:,cd8effector_genes].X.sum(1)
TRM_genes = ['Itgae','Cd69']
adata.obs['TRM_score'] = adata[:,TRM_genes].X.sum(1)


sc.pl.umap(adata, color=['cd8_score', 'cd4_score'],palette='tab20',cmap='coolwarm',vmax=4)



In [None]:
sc.pl.umap(adata, color=['Treg_score','Th_score'],palette='tab20',cmap='coolwarm', vmax=4)

In [None]:
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['Cd3e'],palette='tab20',cmap='coolwarm',vmax=4)
sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'], ax=axs[0,1], show=False,color=['Cd3e'],palette='tab20',cmap='coolwarm',vmax=4)
sc.pl.umap(adata[adata.obs['sample']=='ko_naive'], ax=axs[1,0], show=False, color=['Cd3e'],palette='tab20',cmap='coolwarm',vmax=4)
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], ax=axs[1,1], color=['Cd3e'],palette='tab20',cmap='coolwarm',vmax=4)


In [None]:
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['Cd8a'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'], ax=axs[0,1], show=False,color=['Cd8a'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['sample']=='ko_naive'], ax=axs[1,0], show=False, color=['Cd8a'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], ax=axs[1,1], color=['Cd8a'],palette='tab20',cmap='coolwarm',vmax=3)


In [None]:
ncols = 2
nrows = 2
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
plt.subplots_adjust(wspace=wspace)
sc.pl.umap(adata[adata.obs['sample']=='wt_naive'], ax=axs[0,0], show=False, color=['Cd4'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['sample']=='wt_pathogenic'], ax=axs[0,1], show=False,color=['Cd4'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['sample']=='ko_naive'], ax=axs[1,0], show=False, color=['Cd4'],palette='tab20',cmap='coolwarm',vmax=3)
sc.pl.umap(adata[adata.obs['sample']=='ko_pathogenic'], ax=axs[1,1], color=['Cd4'],palette='tab20',cmap='coolwarm',vmax=3)


In [None]:
# This shows CTLA4+ CD8+ T cells 
sc.pl.umap(adata, color=['Cd8a','Ctla4'],palette='tab20',cmap='coolwarm',vmax=2)

In [None]:
#CD3E+ CD8A- CD4-
adata.obs['DNTscore'] = (np.asarray(1-adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(1-adata[:,'Cd4'].X.sum(1)/np.max(adata[:,'Cd4'].X.sum(1))))*np.asarray(adata[:,'Cd3e'].X.sum(1)/np.max(adata[:,'Cd3e'].X.sum(1)))

# CD3E+ CD8A+ CD4+
adata.obs['DPTscore'] = (np.asarray(adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(adata[:,'Cd4'].X.sum(1)/np.max(adata[:,'Cd4'].X.sum(1))))

#CD3E- CD8A- CD4-
adata.obs['TNTscore'] = (np.asarray(1 - adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))*np.asarray(1 - adata[:,'Cd4'].X.sum(1)/np.max(adata[:,'Cd4'].X.sum(1))))*np.asarray(1- adata[:,'Cd3e'].X.sum(1)/np.max(adata[:,'Cd3e'].X.sum(1)))
ncols = 2
nrows = 1
figsize = 8
wspace = 0.1
fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)
sc.pl.umap(adata, color=['DNTscore'],palette='tab20',cmap='coolwarm', ax=axs[0],vmax=1, show=False)
sc.pl.umap(adata, color=['TNTscore'],palette='tab20',cmap='coolwarm', ax=axs[1],vmax=1)


In [None]:
sc.pl.umap(adata, color=['Pdcd1'],palette='tab20',cmap='coolwarm',vmax=0.8)


In [None]:
# Fcgr3 is the gene name for CD16
sc.pl.umap(adata, color=['Ncam1', 'Fcgr3'],palette='tab20',cmap='coolwarm', vmax=1)


In [None]:
# Sell is alias for CD62L (naive/early memory T cell marker for central memory T cells)
memory_genes = ['Cd44','Sell','Il7r']
Trm_genes = ['Cd44','Sell','Il7r', 'Cd69','Itgae' ]

adata.obs['memory_score'] = adata[:,memory_genes].X.sum(1)
adata.obs['Trm_score'] = adata[:,Trm_genes].X.sum(1)
sc.pl.umap(adata, color=['memory_score','Trm_score'],palette='tab20',cmap='coolwarm')


In [None]:
# Tcm markers
sc.pl.umap(adata, color=['Sell', 'Ccr7'],palette='tab20',cmap='coolwarm', vmax=3)

### NK cells

In [None]:
nk_genes = ['Klrb1c', 'Ncr1','Fcgr3','Itgal']
adata.obs['nk_score'] = adata[:,nk_genes].X.sum(1)
sc.pl.umap(adata, color=['nk_score'],palette='tab20',cmap='coolwarm')

### Naive B

In [None]:
adata.var_names[adata.var_names.str.startswith('Pdcd1')]

In [None]:
naiveB_genes = ['Ighd', 'Ighm']
memoryB_genes = ['Itgam','Cd80','Cxcr3','Nt5e','Pdcd1lg2']
adata.obs['naiveB_score'] = adata[:,naiveB_genes].X.sum(1)
adata.obs['memoryB_score'] = adata[:,memoryB_genes].X.sum(1)
sc.pl.umap(adata, color=['naiveB_score','memoryB_score'],palette='tab20',cmap='coolwarm')


### Age-associated B

In [None]:
ageB_genes = ['Ighm','Il10', 'Ifng', 'Itgax', 'Tbx2', 'Itgam', 'Fas']

adata.obs['ageB_score'] = adata[:,ageB_genes].X.sum(1)
sc.pl.umap(adata, color=['ageB_score'],palette='tab20',cmap='coolwarm',vmax=5)


### marginal Zone B cells

In [None]:
mzB_genes = ['Cd9','Cr2','Spib']

adata.obs['mzB_score'] = adata[:,mzB_genes].X.sum(1)
sc.pl.umap(adata, color=mzB_genes,palette='tab20',cmap='coolwarm',vmax=2)

### B-1

In [None]:
B1_genes = ['Cd5','Spn','Ptpn22']
sc.pl.umap(adata, color=B1_genes,palette='tab20',cmap='coolwarm',vmax=1)

### Common innate-like B cell markers

In [None]:
innate_genes = ['Cd19','Cd24a','Cd38']
sc.pl.umap(adata, color=innate_genes,palette='tab20',cmap='coolwarm',vmax=2)

### DC

In [None]:
# Gene ref: Saito 2022, The role of type-2 conventional dendritic cells in the regulation of tumor immunity
dc1_saito = ['Xcr1','Itgax','Clec9a','Cd8a','Ly75']
dc1_act = ['Clec9a','Itgax']
dc1_shi_etal = ['Xcr1','Tlr3','Clec9a','Cadm1']

adata_subset = adata[adata.obs['leiden'].isin(['r0','r1','r2','r3','r4','r5','r6','r7','r8','r9','20','21'])].copy()

sc.tl.score_genes(adata, dc1_saito, score_name='dc1_saito_score')
sc.tl.score_genes(adata, dc1_act, score_name='dc1_act_score')
sc.tl.score_genes(adata, dc1_shi_etal, score_name='dc1_shi_etal_score')

sc.pl.umap(adata_subset, color=['dc1_shi_etal_score','dc1_act_score'],palette='tab20',cmap='coolwarm',vmax=1.5)

# Mouse pDC's express CD11c (Itgax) at low level. cDC2 express Itgax, Itgam and Sirpa
# Shi et al state that Sirpa is expressed in humans, and Cd11b (Itgam) in mice. We do observe Sirpa

dc2_saito = ['Itgax','Itgam','Sirpa']
dc2_act = ['Fcgr1','Itgam','Sirpa']
dc2_combined = list(np.append(dc2_saito,dc2_act))

sc.tl.score_genes(adata, dc2_saito, score_name='dc2_saito_score')
sc.tl.score_genes(adata, dc2_act, score_name='dc2_act_score')
sc.tl.score_genes(adata, dc2_combined, score_name='dc2_combined_score')


sc.pl.umap(adata_subset, color=['dc2_saito_score','dc2_act_score'],palette='tab20',cmap='coolwarm',vmax=1.0)

In [None]:
sc.pl.umap(adata_DC1_subset, color=['Irf8','Irf4'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_subset, color=['Xcr1','Itgax'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_subset, color=['Clec9a','Cd24a'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_subset, color=['Klf4','Tbx21'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_subset, color=['Relb','Tbx21'],palette='tab20',cmap='coolwarm',vmin=0)


In [None]:
sc.pl.umap(adata_DC1_subset, color=['Cd8a','Sirpa','Xcr1'],palette='tab20',cmap='coolwarm',vmin=0)


In [None]:
sc.pl.umap(adata_DC1_subset, color=['Cd80', 'Cd86'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_subset, color=['Cd40','Ccr7'],palette='tab20',cmap='coolwarm',vmin=0)

# ['Ccr7','Itgae', 'Cd207', 'Cx3cr1', 'Itgax', 'H2-Ab1', 'Cd40', 'Cd80', 'Cd86', 'Relb', 'Zbtb46']

In [None]:
# More detailed comparison between DC1 subsets
sc.tl.rank_genes_groups(adata, 'leiden', groups = ['r1','r2','r3','r4'], reference='rest', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)
de_markers = sc.get.rank_genes_groups_df(adata, None)


In [None]:
# Check markers in DC1: The DEGs can be used to separate subgroups. We will use Cst3 (Cystatin C, CyC)

#Ref: The protease inhibitor cystatin C is differentially expressed among dendritic cell populations, but does not control antigen presentation
# El-Sukkari et al., J Immunol. 2003 Nov 15;171(10):5003-11.  doi: 10.4049/jimmunol.171.10.5003.

adata_DC1_subset = adata[adata.obs['leiden'].isin(['r1','r2','r3','r4','r5','r6','r7','r8'])].copy()
sc.pl.umap(adata_DC1_subset, color=['Cd8a','Cd4','Itgax','Wdfy4'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_subset, color=['Ccr2','Cst3','Xcr1','Sirpa'],palette='tab20',cmap='coolwarm',vmin=0)


In [None]:
# Check markers in DC1: The DEGs can be used to separate subgroups. We will use Cst3 (Cystatin C, CyC)

#Ref: The protease inhibitor cystatin C is differentially expressed among dendritic cell populations, but does not control antigen presentation
# El-Sukkari et al., J Immunol. 2003 Nov 15;171(10):5003-11.  doi: 10.4049/jimmunol.171.10.5003.

adata_DC1_dsubset = adata[adata.obs['leiden'].isin(['r1','r2','r3','r4','r5','r6','r7','r8','20'])].copy()
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='wt'], color=['Cd8a','Cd4','Sirpa','Itgax'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='ko'], color=['Cd8a','Cd4','Sirpa','Itgax'],palette='tab20',cmap='coolwarm',vmin=0)

sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='wt'], color=['Ccr2','Cst3','Xcr1','Cd74'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['WT/KO']=='ko'], color=['Ccr2','Cst3','Xcr1','Cd74'],palette='tab20',cmap='coolwarm',vmin=0)
'Cd40','Ccr7'

In [None]:
adata_DC1_dsubset = adata[adata.obs['leiden'].isin(['r1','r2','r3','r4','r5','r6','r7','r8','20'])].copy()
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['pathogenicity']=='naive'], color=['Ccr2','Ccr7'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['pathogenicity']=='pathogenic'], color=['Ccr2','Ccr7'],palette='tab20',cmap='coolwarm',vmin=0)

sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['pathogenicity']=='naive'], color=['Cd80','Cd86','Cd40'],palette='tab20',cmap='coolwarm',vmin=0)
sc.pl.umap(adata_DC1_dsubset[adata_DC1_dsubset.obs['pathogenicity']=='pathogenic'], color=['Cd80','Cd86','Cd40'],palette='tab20',cmap='coolwarm',vmin=0)


In [None]:
### Beutler lab study Ccr2+ cDC2 cells. 
sc.pl.umap(adata_subset, color=['Ccr2','Wdfy4'],palette='tab20',cmap='coolwarm',vmax=3)

In [None]:
#Let's check monocyte-derived DC markers (Itgam=Cd11b, Fcgr1=Cd64, Csf1r = Cd115, Adgre = F4/80)
# Elodie Segura, Alice Coillard. Antigen presentation by mouse monocyte-derived cells: Re-evaluating
# the concept of monocyte-derived dendritic cells. Molecular Immunology, 2021, 135, pp.165-169.
# 10.1016/j.molimm.2021.04.012￿. inserm-03381917

modc = ['Ccr2','Itgam','Fcgr1','Csf1r','Adgre1']

sc.tl.score_genes(adata, modc , score_name='modc_score')

sc.pl.umap(adata_subset, color=['modc_score'],palette='tab20',cmap='coolwarm',vmax=2)

In [None]:
sc.pl.umap(adata_subset, color=['Bst2','Tcf4'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_subset, color=['Zeb2','Irf7'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata_subset, color=['Pacsin1','Spib'],palette='tab20',cmap='coolwarm')


In [None]:
pdc_saito = ['Ptprc','Bst2','Siglech']

adata.obs['pdc_score_saito'] = adata[:,pdc_saito].X.sum(1)
sc.pl.umap(adata, color=['pdc_score_saito'],palette='tab20',cmap='coolwarm')


In [None]:
migratory_dc_act = ['Arc','Ccr7','Irf4'] 
sc.tl.score_genes(adata, migratory_dc_act , score_name='migratory_dc_act_score')
adata_subset = adata[adata.obs['leiden'].isin(['r0','r1','r2','r3','r4','r5','r6','r7','r8','r9','20','21'])].copy()
sc.pl.umap(adata_subset, color=['migratory_dc_act_score'],palette='tab20',cmap='coolwarm',vmax=1.2)
sc.pl.umap(adata_subset, color= ['Arc','Ccr7','Irf4'] ,palette='tab20',cmap='coolwarm',vmax=1.2)

In [None]:
sc.pl.umap(adata, color=['Bst2','Klk1'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=['Cmah', 'Bcl119'],palette='tab20',cmap='coolwarm')


In [None]:
# dc1_genes = ['Cd8a','Irf8']
dc1_genes = ['Cd8a','Irf8', 'Batf3', 'Nfil3','Id2', 'Bcl6', 'Xcr1','Rab43', 'Itgax', 'Itgae', 'Cd24a']
adata.obs['dc1_score'] = adata[:,dc1_genes].X.sum(1)
# dc2_genes = ['Relb', 'Esam', 'Itgam','Irf4', 'Sirpa']
dc2_genes = ['H2-Ab1', 'Sirpa', 'Cd4', 'Notch2', 'Clec4a2','Esam', 'Irf4', 'Relb', 'Zeb2', 'Klf4']
adata.obs['dc2_score'] = np.asarray(adata[:,dc2_genes].X.sum(1)/np.max(adata[:,dc2_genes].X.sum(1)))*np.asarray(1-adata[:,'Cd8a'].X.sum(1)/np.max(adata[:,'Cd8a'].X.sum(1)))
pdc_genes =['Cd209a','Lifr','Tcf4', 'Zeb2', 'Pacsin1', 'Spib']
adata.obs['pdc_score'] = adata[:,pdc_genes].X.sum(1)

migratorydc_genes = ['Ccr7','Itgae', 'Cd207', 'Cx3cr1', 'Itgax', 'H2-Ab1', 'Cd40', 'Cd80', 'Cd86', 'Relb', 'Zbtb46']
adata.obs['migratorydc_score'] = adata[:,migratorydc_genes].X.sum(1)


sc.pl.umap(adata, color=['dc1_score'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=['dc2_score'],palette='tab20',cmap='coolwarm',vmax=0.9)
sc.pl.umap(adata, color=['pdc_score'],palette='tab20',cmap='coolwarm',vmax=10)
sc.pl.umap(adata, color=['migratorydc_score'],palette='tab20',cmap='coolwarm')



In [None]:
## Division to Cross-presenting and Sirpa+ DC
## See Gurka et al, Front. Immunol., 04 February 2015 Sec. Antigen Presenting Cell Biology
## Volume 6 - 2015 | https://doi.org/10.3389/fimmu.2015.00035

cdc1_genes1 = ['Cd8a','Clec9a','Batf3']
cdc1_genes2 = ['Xcr1','Sirpa']
sc.pl.umap(adata, color=cdc1_genes1,palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=cdc1_genes2,palette='tab20',cmap='coolwarm')


In [None]:
### Activated cDCs
acdc_genes1 = ['Itgam' ,'Itgax']
acdc_genes2 = ['Cd80' ,'Cd86']
sc.pl.umap(adata, color=acdc_genes1,palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=acdc_genes2,palette='tab20',cmap='coolwarm')

In [None]:
### Monocyte-derived DC
mddc_genes1 = ['Itgam' ,'Itgax']
mddc_genes2 = ['Il12a','Ly6g']
sc.pl.umap(adata, color=mddc_genes1,palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=mddc_genes2,palette='tab20',cmap='coolwarm')


In [None]:
#pDC markers separately
sc.pl.umap(adata, color=['Irf8','Tcf4'],palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=['Zbtb46','Siglech'],palette='tab20',cmap='coolwarm')

In [None]:
sc.pl.umap(adata, color=['Itgae','Xcr1'],palette='tab20',cmap='coolwarm')


### Monocytes

In [None]:
clmono_genes=['Ccl9', 'Ccr2','Cd68','Ly6c2']
nclmono_genes = ['Csf1r','Cx3cr1','Fabp4']
adata.obs['clmono_score'] = adata[:,clmono_genes].X.sum(1)
adata.obs['nclmono_score'] = adata[:,nclmono_genes].X.sum(1)
sc.pl.umap(adata, color=['clmono_score', 'nclmono_score'],palette='tab20',cmap='coolwarm')

### Macrophages

In [None]:
# macroph_genes = ['Adgre1','Itgam','Ly6g']

#PanglaoDB
macroph_genes = ['Cd68','Fcgr1','Naaa','Lyz2','Ccl12']
#['Adgre1','Itgam','Apoe','C1qa','Cx3cr1']
adata.obs['macroph_score'] = adata[:,macroph_genes].X.sum(1)

sc.pl.umap(adata, color=['macroph_score'],palette='tab20',cmap='coolwarm')


### Plasma cells

In [None]:
sc.pl.umap(adata, color=['Mzb1','Ighg1'], palette='tab20',cmap='coolwarm')


In [None]:
plasma_genes = ['Jchain','Sdc1']
adata.obs['plasma_score'] = adata[:,plasma_genes].X.sum(1)
sc.pl.umap(adata, color=['plasma_score'], palette='tab20',cmap='coolwarm', vmax=1.)


In [None]:
sc.pl.umap(adata, color=['Ltc4s','Batf3'], palette='tab20',cmap='coolwarm', vmax=4)

### Mast cells

In [None]:
mast_genes = ['Kit','Ltc4s','Il1rl1','Hdc']
adata.obs['mast_score'] = adata[:,mast_genes].X.sum(1)
sc.pl.umap(adata, color=['mast_score'], palette='tab20',cmap='coolwarm', vmax=4)

In [None]:
adata.obs['hi_mast_score'] =  (np.asarray(adata[:,mast_genes].X.sum(1).flatten())[0] > np.percentile(np.asarray(adata[:,mast_genes].X.sum(1).flatten())[0], 99.9)).astype(str)
adata.obs['hi_mast_score'] = adata.obs['hi_mast_score'].astype('category')
sc.pl.umap(adata, color=['hi_mast_score'])

### Neutrophils

In [None]:
neutrophil_genes1 = ['Csf3r','Ly6g']
neutrophil_genes2 = ['S100a8','Il1r2']
sc.pl.umap(adata, color=neutrophil_genes1, palette='tab20',cmap='coolwarm')
sc.pl.umap(adata, color=neutrophil_genes2, palette='tab20',cmap='coolwarm')

### Basophils

In [None]:
basophil_genes = ['Fcer1a', 'Cpa3','Ms4a2','Gata2','Il3ra','Ccr3','Hdc']
adata.obs['basophil_score'] = adata[:,basophil_genes].X.sum(1)
sc.pl.umap(adata, color=['basophil_score'], palette='tab20',cmap='coolwarm',vmax=4)

### Progenitor cells

In [None]:
progenitor_genes = ['Cxcr5', 'Id3','Slamf6','Tcf7']
adata.obs['progenitor_score'] = adata[:,progenitor_genes].X.sum(1)
sc.pl.umap(adata, color=['progenitor_score'], palette='tab20',cmap='coolwarm', vmax=5)

In [None]:
ILC2_genes = ['Il1rl1', 'Gata3', 'Icos']
adata.obs['ILC2_score'] = adata[:,ILC2_genes].X.sum(1)
sc.pl.umap(adata, color=['ILC2_score'], palette='tab20',cmap='coolwarm', vmax=5)

### NKT cells

In [None]:
sc.pl.umap(adata, color=['Zbtb16','Klrb1c'], palette='tab20',cmap='coolwarm', vmax=2)

In [None]:
NKT_genes = ['Ncam1', 'Gata3', 'Il2rb']
adata.obs['NKT_score'] = adata[:,NKT_genes].X.sum(1)
sc.pl.umap(adata, color=NKT_genes, palette='tab20',cmap='coolwarm', vmax=1)

In [None]:
#Invariant alpha chain genes for NKT cells
sc.pl.umap(adata, color=['Trav11','Traj18'], palette='tab20',cmap='coolwarm',vmax=0.5)
sc.pl.umap(adata, color=['Zbtb16','Tbx21'], palette='tab20',cmap='coolwarm', vmax=2)

In [None]:
# common NKT β chain genes
sc.pl.umap(adata, color=['Trbv1','Trbv13-2','Trbv29'], palette='tab20',cmap='coolwarm', vmax=1)

### gamma delta T

In [None]:
gamma_delta_genes = ['Tcrg-V4', 'Tcrg-V6', 'Tcrg-V1','Tcrg', 'Tcrd', 'Trgv1', 'Trgv2', 'Trgv3', 'Trgv4', 'Trgv5', 'Trgv6', 'Trgv7','Trdv1', 'Trdv3', 'Trdv4', 'Trdv5','Trg', 'Trd', 'Sox13', 'Id3', 'Blk', 'Il17a', 'Ifng']
present_gamma_delta_genes = [gene for gene in gamma_delta_genes if gene in adata.var_names]
print("Present γδ T cell genes:", present_gamma_delta_genes)
adata.obs['gamma_delta_score'] = adata[:,present_gamma_delta_genes].X.sum(1)
sc.pl.umap(adata, color=['gamma_delta_score'], palette='tab20',cmap='coolwarm', vmax=5)

In [None]:
[i for i in adata.var_names.tolist() if i.startswith('Tcr')]

In [None]:
# gamma delta T markers
['Tcrg-V4', 'Tcrg-V6', 'Tcrg-V1']
sc.pl.umap(adata, color=['Tcrg-V4', 'Tcrg-V6', 'Tcrg-V1'], palette='tab20',cmap='coolwarm', vmax=0.5)