In [None]:
# Script for the major level annotation of clustered single cell data.
# By Louise Baldwin
# takes clustered merged h5ad as input, requires previously generates list of DEGs. Output is annotated h5ad

In [None]:
###################
# Set up
###################

# import packages
import numpy as np
import pandas as pd
import scanpy as sc
import os
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
from numpy import sin

# directories
os.chdir("/share/ScratchGeneral/loubal/projects/MSC/mouse-single-cell")
in_file = ("data/processed/clustered_merged_umap.h5ad")
results_file = ("data/processed/annotated.h5ad")
figdir = ("outs/4_major_annotation/figures/")
tabdir = ("outs/4_major_annotation/tables/")
os.makedirs(figdir, exist_ok=True)
os.makedirs(tabdir, exist_ok=True)

# set parameters for scanpy
# verbosity: errors (0), warnings (1), info (2), hints (3), detailed traceback (4)
# change default figdir to desired figdir
sc.settings.verbosity = 2            
sc.logging.print_header()
sc.settings.set_figure_params(dpi=300, facecolor='white')
#sc.set_figure_params(facecolor='white', color_map="viridis")
#sc.settings.figdir='/share/ScratchGeneral/loubal/projects/MSC/mouse-single-cell/outs/QC/figures/'
sc.settings.figdir=figdir

In [None]:
adata=sc.read_h5ad(in_file)
adata

In [None]:
sc.pl.umap(adata, color="leiden_0.3", frameon=False, save="_leiden03.pdf")

In [None]:
#plot cluster at leiden with labels on clusters
sc.pl.umap(adata, color=['leiden'], size=1, legend_loc='on data', legend_fontsize='small',
show=False, wspace=0.5, frameon=False,  title='leiden', save="leiden_labelled_ondata.png")#.savefig(sc.settings.figdir/"umap_leiden_0.3_labelled_ondata.png", bbox_inches='tight')

sc.pl.umap(adata, color=['leiden'], size=1, legend_fontsize='small',
show=False, wspace=0.5, frameon=False, title='leiden', save="leiden_labelled.png")#.savefig(sc.settings.figdir/"umap_leiden_0.3_labelled.png", bbox_inches='tight')


In [None]:
#Plot marker genes
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['0','1','2'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['3','4','5'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['6', '7', '8'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['9', '10', '11'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['12', '13', '14'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['15', '16', '17'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['18', '19', '20'], fontsize=10)
sc.pl.rank_genes_groups(adata, key='rank_genes', groups=['21', '22', '23'], fontsize=10)

In [None]:
sc.pl.umap(adata, color="leiden")

In [None]:
#rcParams['figure.figsize']=(7,7)
# sc.pl.umap(adata, color='Foxp3', use_raw=False, color_map="RdPu")
# sc.pl.umap(adata, color='Cd8a', use_raw=False, color_map="RdPu")
# sc.pl.umap(adata, color='Cd4', use_raw=False, color_map="RdPu")
# sc.pl.umap(adata, color='Adgre1', use_raw=False, color_map="RdPu")
# sc.pl.umap(adata, color='Cxcl3', use_raw=False, color_map="RdPu")
#epithelial
# sc.pl.umap(adata, color=["Krt18", "Ccnd1", "Muc1"], save="_epithelial_markers.png")
sc.pl.umap(adata, color=["Krt8", "Krt19", "Krt7"], save="_epithelial_markers_2.png")
# fibroblast
# sc.pl.umap(adata, color=["Col1a1", "Acta2", "Vim"], save="_fibroblast_markers.png")
# #B cells
# sc.pl.umap(adata, color=["Ptprc", "Cd19", "Ighm"], save="_Bcell_markers.png")
# # T cells
# sc.pl.umap(adata, color=["Cd4", "Cd8a", "Foxp3"], save="_Tcell_markers.png")
# # myeloid
# sc.pl.umap(adata, color=["Itgam", "Adgre1", "Ly6c1"], save="_myeloid_markers.png")
# # other
# sc.pl.umap(adata, color=["Ncr1", "Srgn", "Siglech"], save="other_markers.png")


In [None]:
sc.pl.umap(adata, color="leiden_0.3")

In [None]:
cluster2annotation = {
     '0': 'CD4 T cell',
     '1': 'B cell',
     '2': 'CD8 T cell',
     '3': 'Epithelial', 
     '4': 'CD4 T cell',
     '5': 'Myoepithelial',
     '6': 'Myoepithelial',
     '7': 'Myeloid',
     '8': 'Mast cell',
     '9': 'NK cell',
     '10': 'CD4 T cell',
     '11': 'Fibroblast',
     '12': 'Epithelial',
     '13': 'B cell',
     '14': 'pDC',
     '15': 'Unknown',
     '16': 'B cell',
     '17': 'Myeloid',
     '18': 'B cell',
     '19': 'Myeloid',
     '20': 'Myoepithelial',
     '21': 'Myoepithelial',
     '22': 'Epithelial',
     '23': 'Epithelial',
}


adata.obs['Cell_type'] = adata.obs['leiden_0.3'].map(cluster2annotation).astype('category')

In [None]:
adata = sc.read_h5ad(results_file)

In [None]:
#rcParams['figure.figsize']=(7,7)

sc.pl.umap(adata, color=['Cell_type'], frameon=False, save="_major_annotation.pdf")

# sc.pl.umap(adata, color=['mcCell_type'], size=1,
#  show=False, return_fig=True, ncols=2, wspace=0.8, 
#    title='Major level annotation').savefig(sc.settings.figdir/"annotated_leiden_0.3.png")

In [None]:
adata

In [None]:
# adata.obs['mcCell_type'].value_counts()
# adata.obs['mcCell_type'].value_counts()
pd.crosstab(adata.obs['Tissue'], adata.obs['Cell_type'])
pd.crosstab(adata.obs['ReactionID'], adata.obs['Cell_type'])

pd.crosstab(adata.obs['Tissue'], adata.obs['Cell_type']).to_csv(tabdir+'cells_per_tissue.csv')
pd.crosstab(adata.obs['ReactionID'], adata.obs['Cell_type']).to_csv(tabdir+'cells_per_reaction.csv')

# markers.to_csv(tabdir+'cells_per_tissue.csv')

In [None]:
pd.crosstab(adata.obs['Model'],adata.obs['Cell_type']).to_csv(tabdir+'cells_per_model.csv')

In [None]:
pd.crosstab(adata.obs['Tissue'], adata.obs['Cell_type'])
pd.crosstab(adata.obs['ReactionID'], adata.obs['Cell_type'])

In [None]:
# sns.set(rc={"figure.figsize":(15, 3)}) #width=3, #height=4

counts = adata.obs.groupby(['Cell_type']).count().reset_index().sort_values('n_counts', ascending=False)
sns.set_context("paper")
sns.despine()
plt.figure(figsize = (8,3))
ax = sns.barplot(x = 'Cell_type', y = 'n_counts', data = counts, order=counts["Cell_type"])
#ax.set_yscale("log")
ax.grid(False)
ax.bar_label(ax.containers[0], fontsize=9)
plt.xticks(rotation=90)
plt.savefig((figdir+"cells_per_cell_type.pdf"), bbox_inches="tight")

In [None]:
#save new annotated figure
adata.write_h5ad(results_file)

In [None]:
in_file=results_file
adata = sc.read_h5ad(in_file)

In [None]:
sc.pl.umap(adata, color="Cell_type", frameon=False, save="_annotated.pdf")

In [None]:
# figuring out the myofibroblast cluster
# are they myoepithelial cells?
sc.pl.umap(adata, color=["Tagln", "Oxtr", "Mme"], save="_myoepithelial_markers.pdf")

In [None]:
sc.pl.umap(adata, color=["Acta2", "Pdpn", "Cxcl12"], save="_icaf_mycaf_markers.pdf")

In [None]:
sc.pl.umap(adata, color=["Krt5", "Krt14", "Krt17"])
sc.pl.umap(adata, color=["Krt8", "Krt18", "Krt19"])

In [None]:
sc.pl.umap(adata, color="Treatment")