In [5]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy
import sys
from importlib import reload
from pathlib import Path
import scanpy as sc
from pylab import rcParams
from matplotlib.patches import Patch
import re
import seaborn as sns
import glob
import matplotlib
from scipy.stats import zscore

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import scvelo as scv
scv.logging.print_version()


scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.set_figure_params('scvelo')  # for beautified visualization

%matplotlib inline

Running scvelo 0.2.2 (python 3.8.3) on 2021-05-22 09:26.


In [6]:
def adata_louvain_name_convert(inputAdata, orderDf, louvainNewOrdered, louvainOldOrdered):
    inputAdata.obs["louvain_celltype"] = ["%s_%s"%(x,y) for index, (x,y) in enumerate(zip(inputAdata.obs['louvain'], inputAdata.obs['cell_type']))]
    inputAdata.obs["celltype_general"] = [x[0] for x in inputAdata.obs['cell_type']]
    
    #--- Match old & new louvain levels
    inputAdata.obs['louvain_old'] = inputAdata.obs['louvain']
    inputAdata.uns['louvain_old_colors'] = inputAdata.uns['louvain_colors']
    inputAdata.obs['louvain'] = [ louvainNewOrdered[louvainOldOrdered.index(int(x))] for x in inputAdata.obs['louvain'] ]
    category_order = orderDf.sort_values('louvain')['louvain_new_label'].tolist()
    category_order = [x for x in category_order if x in np.unique(inputAdata.obs['louvain'])]
    inputAdata.obs['louvain'] = pd.Categorical(inputAdata.obs['louvain'].tolist(), categories=category_order) # The key is to set categories according to original order...

    inputAdata.obs['Infection type'] = [ 'Armstrong' if 'A' in x else 'Clone13' if 'C' in x else 'Naive' for x in inputAdata.obs['cell_type']]
    inputAdata.obs['cell_id'] = inputAdata.obs.index.tolist()

In [7]:
###----- Get dir & load / reload packages
try:
    code_dir
except NameError:
    print("Start")
    code_dir = os.getcwd()
    base_dir = code_dir.replace("/plotting_codes","")
    tools_dir = base_dir + "/tools"

    sys.path.insert(1, tools_dir)
    import sc_addons
    import sc_pl_addons
else:
    print("Reload")
    reload(sc_addons)
    reload(sc_pl_addons)

Start


In [8]:
## ----- Setup working dir
sp_name = "0_Acute-Chronic"
analysis_name = ["0", "resampled"] 

wk_dir = base_dir + "/9_Figures/%s/%s_%s/scVelo"%(sp_name, analysis_name[0], analysis_name[1])
Path(wk_dir).mkdir(parents=True, exist_ok=True)
os.chdir(wk_dir)

print(sp_name)

0_Acute-Chronic


In [9]:
###----- old - new cluster names
order_file ='%s/%s/2_scVelo'%(base_dir, sp_name) + '/%s_scVelo_out_%sNoDyn/pt_ordered_cluster_avg.csv'%(analysis_name[0], analysis_name[1])
order_df = pd.read_csv(order_file)
louvain_new_ordered = order_df['louvain_new_label'].tolist()
louvain_old_ordered = order_df['louvain'].tolist()

In [10]:
resampled_h5ad = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/0_Acute-Chronic/2_scVelo/0_scVelo_out_resampled/0_Acute-Chronic_scVelo_postPT.h5ad'
adata = scv.read(resampled_h5ad)
adata_louvain_name_convert(adata, order_df, louvain_new_ordered, louvain_old_ordered)

In [39]:
adata_act_arm = adata[[True if x != 'NP14B' else False for x in adata.obs['cell_type']]]
adata_act_arm = adata_act_arm[[True if 'C' not in x else False for x in adata_act_arm.obs['cell_type']]]

adata_act_cl13 = adata[[True if x != 'NP14B' else False for x in adata.obs['cell_type']]]
adata_act_cl13 = adata_act_cl13[[True if 'A' not in x else False for x in adata_act_cl13.obs['cell_type']]]

In [40]:
paga_xmax, paga_ymax = np.amax(adata.obsm['paga'], axis=0)
paga_xmin, paga_ymin = np.amin(adata.obsm['paga'], axis=0)
paga_dx, paga_dy = paga_xmax - paga_xmin, paga_ymax - paga_ymin
paga_xmax, paga_xmin = paga_xmax + paga_dx*0.1, paga_xmin - paga_dx*0.1
paga_ymax, paga_ymin = paga_ymax + paga_dy*0.1, paga_ymin - paga_dy*0.1

In [41]:
### Intersect chromatin regulators & TF & cell surface receptors
surface_df = pd.read_csv('https://raw.githubusercontent.com/Yolanda-HT/SurfaceMarkers_Cytokines/master/2_compiled/MM_MARKERS.csv')
surface_genes = surface_df['gene_name'].tolist()
crf_df = pd.read_csv('https://raw.githubusercontent.com/ScrippsPipkinLab/CRF_Screen/master/Ref/CRF_all.csv')
crf_genes = crf_df['gene_name'].tolist() + [x for x in crf_df['Alternative'].tolist() if str(x) != 'nan']
tf_df = pd.read_csv('https://raw.githubusercontent.com/Yolanda-HT/TFclassDataCollection/master/Ravasi_TF.csv')
tf_genes = tf_df['symbol'].tolist()
all_genes = list(set(surface_genes + crf_genes + tf_genes))

### Gene signature genes
gs_plot_use_file = '/media/pipkin/ROCKET-PRO/T_cell_signature_Reference/Y_annotated/anno_plotuse_gs_20200928.csv'
gs_plot_use_df = pd.read_csv(gs_plot_use_file)
gs_use = gs_plot_use_df[gs_plot_use_df['plot_use'].notnull()]['gs_name'].tolist()

gs_file = '/media/pipkin/ROCKET-PRO/T_cell_signature_Reference/X_GeneSignatures_mm/all_mouse_T_cell_signatures.csv'
gs_df = pd.read_csv(gs_file)
gs_df_use = gs_df[[True if x in gs_use else False for x in gs_df['gs_name']]]
genes_use = list(set(gs_df_use['gene_symbol'].tolist()))

genes_use = [i for i in genes_use if i in all_genes]
#genes_use = all_genes

In [42]:
out_dir = wk_dir + '/trajectory_drivers'
Path(out_dir).mkdir(parents=True, exist_ok=True)

out_dir = wk_dir + '/likelihood_genes_drivers'
Path(out_dir).mkdir(parents=True, exist_ok=True)

out_dir = wk_dir + '/pairwise_transition_prob'
Path(out_dir).mkdir(parents=True, exist_ok=True)

In [43]:
def extract_avg(adata_use, gene_list, layer):
    df = pd.DataFrame(adata_ij.layers[layer])
    df.columns = adata_use.var.index
    return(df[gene_list].mean(axis=0).tolist())

# Driver gene heatmap for each trajectories

In [70]:
drivers_heatmap_dir = wk_dir + '/likelihood_genes_heatmap'
Path(drivers_heatmap_dir).mkdir(parents=True, exist_ok=True)

In [148]:
trajectory_clusters = [['P2', 'P5', 'P3'], ['P2', 'P5', 'P6'], ['P2', 'P7', 'P9', 'P10'], ['P2', 'P4', 'P9','P10'], ['P2','P6','P8','P9','P10']]
trajectory_clusters = [['P2', 'P5', 'P3']] # Test only

for subset_clusters in trajectory_clusters:
    subset_name = drivers_heatmap_dir + '/' + "-".join(subset_clusters)
    adata_subset = adata[[True if x in subset_clusters else False for x in adata.obs['louvain'] ]]
    order_dict = {subset_clusters[x]:x*0.1 for x in range(len(subset_clusters))}
    adata_subset.obs['louvain_order'] = [order_dict[x] for x in adata_subset.obs['louvain']]

    layer_use_dict = {'Ms':'viridis', 'Mu':'viridis', 'velocity':'icefire'}
    celltype_use_dict = {'Arm': ['A'], 'Cl13': ['C'], 'AllAct':['A','C']}
    
    layer_use_dict = {'velocity':'icefire'} # Test only
    celltype_use_dict = {'Arm': ['A']} # Test only

    for layer_use, layer_use_col in layer_use_dict.items():
        for celltype_name, celltype_use in celltype_use_dict.items():
            adata_plot = adata_subset.copy()
            adata_plot = adata_plot[[True if x in celltype_use else False for x in adata_plot.obs['celltype_general']]]
            if layer_use == 'velocity':
                not_na_axis0 = [ not np.isnan(x) for x in adata_plot.layers['velocity'].sum(axis=0)]
                adata_plot = adata_plot[:, not_na_axis0]
            hm = scv.pl.heatmap(adata_plot, var_names=list(drivers_all), sortby='louvain_order', col_color='louvain', layer=layer_use, show=False,color_map=layer_use_col)
            hm.savefig(subset_name +  "_" + celltype_name +  "_" + layer_use + '.png')
            
            # Cell type average value
            mean_df = layer_group_average_slt_genes(adata_plot, layer_use, list(drivers_all), 'louvain', subset_clusters, celltype_name)
            mean_df = mean_df.loc[hm.data.index.tolist()]
            
            mean_df.to_csv(subset_name +  "_" + celltype_name +  "_" + layer_use + '.csv')
            plt.close()

Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.


In [166]:
if False:
    cbar1 = drivers_heatmap_dir + '/icefire.png'
    fig, ax = plt.subplots()
    fig2, ax2 = plt.subplots()
    newhm = sns.heatmap(hm.data2d, vmax = 1, vmin=0, cmap='icefire', ax=ax2, cbar_ax=ax)
    fig.savefig(cbar1)

In [165]:
if False:
    cbar1 = drivers_heatmap_dir + '/viridis.png'
    fig, ax = plt.subplots()
    fig2, ax2 = plt.subplots()
    newhm = sns.heatmap(hm.data2d, vmax = 1, vmin=0, cmap='viridis', ax=ax2, cbar_ax=ax)
    fig.savefig(cbar1)

## Annotate genes

In [72]:
drivers_heatmap_dir = wk_dir + '/likelihood_genes_heatmap_relabel_genes'
Path(drivers_heatmap_dir).mkdir(parents=True, exist_ok=True)

In [18]:
trajectory_clusters = [['P2', 'P5', 'P3'], ['P2', 'P5', 'P6'], ['P2', 'P7', 'P9', 'P10'], 
                       ['P2', 'P4', 'P9','P10'], ['P2','P6','P8','P9','P10']]
# trajectory_clusters = [['P2', 'P5', 'P3']] # Test only

label_list = ["Zeb2", "Tbx21", "Id2", "Prdm1", "Sell", "Slamf6", "Id3", "Tcf7",
             "Cd69", "Itgae", "Ccr9", "Runx3"]

for subset_clusters in trajectory_clusters:
    subset_name = drivers_heatmap_dir + '/' + "-".join(subset_clusters)
    adata_subset = adata[[True if x in subset_clusters else False for x in adata.obs['louvain'] ]]
    order_dict = {subset_clusters[x]:x*0.1 for x in range(len(subset_clusters))}
    adata_subset.obs['louvain_order'] = [order_dict[x] for x in adata_subset.obs['louvain']]

    layer_use_dict = {'velocity':'icefire'} # 'Ms':'viridis', 'Mu':'viridis', 
    celltype_use_dict = {'Arm': ['A'], 'Cl13': ['C']}
    
    # layer_use_dict = {'Ms':'viridis'} # Test only
    # celltype_use_dict = {'Arm': ['A']} # Test only

    for layer_use, layer_use_col in layer_use_dict.items():
        for celltype_name, celltype_use in celltype_use_dict.items():
            adata_plot = adata_subset.copy()
            adata_plot = adata_plot[[True if x in celltype_use else False for x in adata_plot.obs['celltype_general']]]
            if layer_use == 'velocity':
                not_na_axis0 = [ not np.isnan(x) for x in adata_plot.layers['velocity'].sum(axis=0)]
                adata_plot = adata_plot[:, not_na_axis0]
            
            hm = scv.pl.heatmap(adata_plot, var_names=list(drivers_all), sortby='louvain_order', 
                                col_color='louvain', layer=layer_use, show=False,color_map=layer_use_col)
            
            label_list = [x for x in hm.data2d.index if x in label_list]
            label_loc = [hm.data.index.tolist().index(x) + 0.5 for x in label_list]

            ax = hm.ax_heatmap
            ax.yaxis.set_ticks(label_loc)
            ax.set_yticklabels(label_list)
            hm.savefig(subset_name +  "_" + celltype_name +  "_" + layer_use + '.png')
            ax.set_yticklabels(["" for x in label_list])
            hm.savefig(subset_name +  "_" + celltype_name +  "_" + layer_use + '_nolabels.png')
            plt.close()
            
            # Cell type average value for labeled gene
            mean_df = layer_group_average_slt_genes(adata_plot, layer_use, list(drivers_all), 'louvain', subset_clusters, celltype_name)
            mean_df = mean_df.loc[hm.data.index.tolist()]
            
            label_df = pd.DataFrame({'annotated_genes': label_list})
            label_df = label_df.set_index('annotated_genes').join(mean_df, how='left') # only labeled genes
            
            plt.figure(figsize = (5,10))
            sns.heatmap(label_df, cmap='vlag', vmax=1, vmin=-1)
            plt.savefig(subset_name +  "_" + celltype_name +  "_" + layer_use + '_avg.png') # save avg heatmap
            
            label_df['order - total %s'% len(hm.data)] = [hm.data.index.tolist().index(x) + 1 for x in label_list]
            label_df.to_csv(subset_name +  "_" + celltype_name +  "_" + layer_use + '.csv')

Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
Trying to set attrib

In [34]:
mean_df

Unnamed: 0,Cl13_P2,Cl13_P6,Cl13_P8,Cl13_P9,Cl13_P10
Foxp1,-0.067417,-0.088733,-0.081198,-0.103892,-0.116543
Cd55,-0.016700,-0.054678,-0.054267,-0.032691,-0.050580
Foxm1,0.033839,0.001858,0.017334,0.026224,0.032578
Treml2,0.174560,0.040258,0.000922,-0.013716,0.000000
Klrd1,0.269517,-0.040026,0.072878,-0.652999,-0.116160
...,...,...,...,...,...
Slc3a2,-0.064850,0.018579,-0.051713,0.070782,0.041483
Rbbp7,0.041107,0.054939,-0.309959,0.257367,0.440128
Brd9,0.034774,0.001347,-0.028489,0.081589,0.078523
Prmt1,0.017961,-0.024582,-0.149717,0.069749,0.208329
