In [None]:
# Script for annotated subsetted bbknn-corrected T cells 
# By Louise Baldwin
# Takes bbknn adjusted T cells .h5ad as input


In [None]:
# Set up
###################

# import packages
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import os
import bbknn as bb
# import scvelo as scv
import scipy as sp
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context

import anndata
import joblib
import sys

from matplotlib import rcParams
from numpy import sin
from tqdm.auto import tqdm

# # directories
os.chdir("/share/ScratchGeneral/loubal/projects/MSC/mouse-single-cell")
in_file = ("data/processed/Subset_Tcells_BBKNN.h5ad")
results_file = ("data/processed/Subset_Tcells_BBKNN_annotated.h5ad")
figdir = ("outs/BBKNN_after_Tcellsubset/figures/")
tabdir = ("outs/BBKNN_after_Tcellsubset/tables/")
os.makedirs(figdir, exist_ok=True)
os.makedirs(tabdir, exist_ok=True)

# set parameters for scanpy
# verbosity: errors (0), warnings (1), info (2), hints (3), detailed traceback (4)
# change default figdir to desired figdir
sc.settings.verbosity = 3           
sc.logging.print_header()
sc.settings.set_figure_params(dpi=150, facecolor='white')
#sc.set_figure_params(facecolor='white', color_map="viridis")
#sc.settings.figdir='/share/ScratchGeneral/loubal/projects/MSC/mouse-single-cell/outs/QC/figures/'
sc.settings.figdir=figdir
# scv.set_figure_params(vector_friendly=False)

In [None]:
adata=sc.read(in_file)

In [None]:
adata

In [None]:
sc.pl.umap(adata, color="leiden_1", frameon=False, save="_leiden1.pdf", size=5)
sc.pl.umap(adata, color="leiden_1", frameon=False, save="_leiden1_ondata.pdf", size=5, legend_loc="on data")

In [None]:
sc.pl.umap(adata, color=['Cd8a','Cd4','Foxp3','Ncr1','Tigit', 'Pdcd1', 'Tcf7', 'Icos', 'Tox', 'Cd40lg', 'Sell', 'Ncam1', 'Cxcr5', 'Ifng', 'Gata3', 'Bcl6', 'Mki67', 'leiden_1'],
 s=10, color_map='viridis',legend_loc='on data', legend_fontsize='small', wspace=0.3, save="_markers_")

In [None]:
# sc.pl.heatmap(adata, var_names=['Cd8a','Cd4','Foxp3','Ncr1','Tigit', 'Pdcd1', 'Tcf7', 'Icos', 'Tox', 'Cd40lg', 'Sell', 'Ncam1', 'Cxcr5', 'Ifng', 'Gata3', 'Bcl6', 'Mki67'],
#  groupby="leiden_1.2", figsize=[5,20])

In [None]:
sc.pl.umap(adata, color='leiden_1.2' , legend_loc="on data")

In [None]:
#Let's try and split cluster 10
sc.tl.leiden(adata, restrict_to=('leiden_1', ['10']), key_added='leiden_1_sub10', resolution=0.1)

In [None]:
sc.pl.umap(adata, color="leiden_1_sub10" )

In [None]:
sc.pl.umap(adata, color="leiden_1_sub10", legend_loc="on data" )

In [None]:
#Let's try and split cluster 10
sc.tl.leiden(adata, restrict_to=('leiden_1', ['10']), key_added='leiden_1_sub10', resolution=0.1)

In [None]:
sc.pl.umap(adata, color="leiden_1_sub10", frameon=False)

In [None]:
sc.tl.leiden(adata, restrict_to=('leiden_1', ['11']), key_added='leiden_1_sub11', resolution=0.2)

In [None]:
sc.pl.umap(adata, color="leiden_1_sub11", frameon=False)

In [None]:
# this isnt really sorting it. getting two clusters but not spltting along cd4 and cd8
sc.pl.umap(adata, color=['Cd8a','Cd4','Foxp3','Mki67', 'leiden_1_sub11'],
 s=10, color_map='viridis',legend_loc='on data', legend_fontsize='small', wspace=0.3)#.savefig(sc.settings.figdir/"markers.png", bbox_inches='tight')

In [None]:
adata.uns['log1p']["base"] = None

In [None]:
#compare 11,1 and 11,0
sc.tl.rank_genes_groups(adata, 'leiden_1.2_sub11', groups=['11,1'], reference='11,0', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['11,1'], n_genes=20)
# returns a lot of hsp genes. 

In [None]:
#lets return attention to sub10 - what are these cells donig and what is the difference between them?
sc.pl.umap(adata, color="leiden_1.2_sub10", frameon=False)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden_1.2_sub10', groups=['10,1'], reference='10,0', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['10,1'], n_genes=20)


In [None]:
# also, what is the differene btween 10,1 and 9?
sc.tl.rank_genes_groups(adata, 'leiden_1.2_sub10', groups=['10,1'], reference='9', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['10,1'], n_genes=20)

In [None]:
#compare 8 and 7
sc.tl.rank_genes_groups(adata, 'leiden_1.2_sub11', groups=['8'], reference='7', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['8'], n_genes=20)

In [None]:
sc.pl.umap(adata, color=["Cd8a", "Cd4", "Foxp3", "Pdcd1", "Tigit", "Mki67", "Gzmb", "Gzma", "Prf1", "leiden_1.2"])

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden_1.2', groups=['7'], reference='13', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['7'], n_genes=20)

In [None]:
# is one of the tfh clusters location specific?
sc.pl.umap(adata, color="Tissue", frameon=False)

In [None]:
sc.pl.umap(adata, color=["Klrb1"])

In [None]:
# create a dictionary to map cluster to annotation label
cluster2annotation = {
     '0': 'CD4 naive Igfbp4', 
     '1': 'CD8 naive', 
     '2': 'CD4 naive Rps',
     '3': 'CD8 Ly6c', 
     '4': 'CD4 Stat1', 
     '5': 'CD4 naive arhgap15', 
     '6': 'Tfh', 
     '7': 'Treg effector',
     '8': 'Treg resting',
     '9': 'Unknown CD4 CD8', 
     '10': 'CD8 effector',
     '11': 'Mt-high',
     '12': 'Cycling',
     '13': 'CD8 Isg15',    
     '14': 'CD4 memory CD69 ',
     '15': 'CD8 Trm', 
}


adata.obs['Tcell_type'] = adata.obs['leiden_1'].map(cluster2annotation).astype('category')



In [None]:
sc.pl.umap(adata, color="Tcell_type", frameon=False, save="_Tcell_annotation.png")

In [None]:
adata.write(results_file)

In [None]:
sc.pl.umap(adata, color="leiden_1.2", legend_loc="on data")

In [None]:
sc.pl.umap(adata, color=["Cxcr3", "Cxcl9", "Cxcl10", "Cxcr6", "Ly6a", "Ly6e", "Id3", "leiden_1.2"])

In [None]:
sc.pl.heatmap(adata, var_names=['Cxcr3', 'Cxcl10', 'Cxcl9'], groupby="leiden_1.2")

In [None]:
# compare 0 and 4
sc.tl.rank_genes_groups(adata, 'leiden_1.2', groups=['0'], reference='4', method='wilcoxon')
sc.pl.rank_genes_groups(adata, groups=['0'], n_genes=20)

In [None]:
def cluster_small_multiples(adata, Tissue, size=60, frameon=False, legend_loc=None, **kwargs):
    tmp = adata.copy()
    for i,clust in enumerate(adata.obs[Tissue].cat.categories):
        tmp.obs[clust] = adata.obs[Tissue].isin([clust]).astype('category')
        tmp.uns[clust+'_colors'] = ['#d3d3d3', adata.uns[Tissue+'_colors'][i]]
    sc.pl.umap(tmp, groups=tmp.obs[clust].cat.categories[1:].values, color=adata.obs[Tissue].cat.categories.tolist(), size=5, ncols=2, frameon=False, legend_loc=legend_loc, **kwargs)


with rc_context({'figure.figsize': (3, 2.5)}):
  cluster_small_multiples(adata, 'Tissue')