In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy
import sys
from importlib import reload
from pathlib import Path
import scanpy as sc
from pylab import rcParams
from matplotlib.patches import Patch
import re
import seaborn as sns
import glob

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import scvelo as scv
scv.logging.print_version()


scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.set_figure_params('scvelo')  # for beautified visualization

%matplotlib inline

Running scvelo 0.2.2 (python 3.8.3) on 2020-11-16 16:28.


In [2]:
###----- Get dir & load / reload packages
try:
    code_dir
except NameError:
    print("Start")
    code_dir = os.getcwd()
    base_dir = code_dir.replace("/plotting_codes","")
    tools_dir = base_dir + "/tools"

    sys.path.insert(1, tools_dir)
    import sc_addons
    import sc_pl_addons
else:
    print("Reload")
    reload(sc_addons)
    reload(sc_pl_addons)

Start


In [3]:
## ----- Setup working dir
sp_name = "1_IL2RAKO"
analysis_name = ["0", "all"] 

wk_dir = base_dir + "/9_Figures/%s/%s_%s/scVelo"%(sp_name, analysis_name[0], analysis_name[1])
Path(wk_dir).mkdir(parents=True, exist_ok=True)
os.chdir(wk_dir)

print(sp_name)

1_IL2RAKO


In [4]:
###----- old - new cluster names
order_file = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/1_IL2RAKO/1_Scanpy/0-all_Scanpy_out/0_sum/1_IL2RAKO_obs_louvainAvg.csv'
order_df = pd.read_csv(order_file)
louvain_new_ordered = order_df['louvain_new_label'].tolist()
louvain_old_ordered = order_df['louvain'].tolist()

In [5]:
all_h5ad = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/1_IL2RAKO/2_scVelo/0-all_Dyn_scVelo_out/1_IL2RAKO_scVelo.h5ad'
adata = scv.read(all_h5ad)

act_h5ad = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/1_IL2RAKO/2_scVelo/1-ACTonly_Dyn_scVelo_out/1_IL2RAKO_scVelo.h5ad'
adata_act = scv.read(act_h5ad)

act_wt_h5ad = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/1_IL2RAKO/2_scVelo/2-actWT_Dyn_scVelo_out/1_IL2RAKO_scVelo.h5ad'
adata_act_wt = scv.read(act_wt_h5ad)

act_ko_h5ad = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/1_IL2RAKO/2_scVelo/2-actKO_Dyn_scVelo_out/1_IL2RAKO_scVelo.h5ad'
adata_act_ko = scv.read(act_ko_h5ad)

In [6]:
paga_xmax, paga_ymax = np.amax(adata.obsm['paga'], axis=0)
paga_xmin, paga_ymin = np.amin(adata.obsm['paga'], axis=0)
paga_dx, paga_dy = paga_xmax - paga_xmin, paga_ymax - paga_ymin
paga_xmax, paga_xmin = paga_xmax + paga_dx*0.1, paga_xmin - paga_dx*0.1
paga_ymax, paga_ymin = paga_ymax + paga_dy*0.1, paga_ymin - paga_dy*0.1

In [7]:
### Intersect chromatin regulators & TF & cell surface receptors
surface_df = pd.read_csv('https://raw.githubusercontent.com/Yolanda-HT/SurfaceMarkerSelect/master/MM_MARKERS.csv')
surface_genes = surface_df['gene_name'].tolist()
crf_df = pd.read_csv('https://raw.githubusercontent.com/ScrippsPipkinLab/CRF_Screen/master/Ref/CRF_all.csv?token=AGIX2OYMCLEV2LQHJGP7NJS7WG2IS')
crf_genes = crf_df['gene_name'].tolist() + [x for x in crf_df['Alternative'].tolist() if str(x) != 'nan']
tf_df = pd.read_csv('https://raw.githubusercontent.com/Yolanda-HT/TFclassDataCollection/master/Ravasi_TF.csv')
tf_genes = tf_df['symbol'].tolist()
all_genes = list(set(surface_genes + crf_genes + tf_genes))

### Gene signature genes
gs_plot_use_file = '/media/pipkin/ROCKET-PRO/T_cell_signature_Reference/Y_annotated/anno_plotuse_gs_20200928.csv'
gs_plot_use_df = pd.read_csv(gs_plot_use_file)
gs_use = gs_plot_use_df[gs_plot_use_df['plot_use'].notnull()]['gs_name'].tolist()

gs_file = '/media/pipkin/ROCKET-PRO/T_cell_signature_Reference/X_GeneSignatures_mm/all_mouse_T_cell_signatures.csv'
gs_df = pd.read_csv(gs_file)
gs_df_use = gs_df[[True if x in gs_use else False for x in gs_df['gs_name']]]
genes_use = list(set(gs_df_use['gene_symbol'].tolist()))

genes_use = [i for i in genes_use if i in all_genes]

HTTPError: HTTP Error 404: Not Found

In [None]:
out_dir = wk_dir + '/likelihood_genes_drivers'
Path(out_dir).mkdir(parents=True, exist_ok=True)

## Extract likelihood genes for all activated cells ARM v.s. CL13

In [12]:
# WT
adata_ij = adata_act_wt
top_genes = adata_ij.var['fit_likelihood'].sort_values(ascending=False)
top_genes = top_genes[top_genes > 0]
top_genes = top_genes[[True if x in genes_use else False for x in top_genes.index]]

wt_top_genes = top_genes.index.tolist()
wt_top_genes_score = top_genes.tolist()

# KO
adata_ij = adata_act_ko
top_genes = adata_ij.var['fit_likelihood'].sort_values(ascending=False)
top_genes = top_genes[top_genes > 0]
top_genes = top_genes[[True if x in genes_use else False for x in top_genes.index]]

ko_top_genes = top_genes.index.tolist()
ko_top_genes_score = top_genes.tolist()

# All
adata_ij = adata_act
top_genes = adata_ij.var['fit_likelihood'].sort_values(ascending=False)
top_genes = top_genes[top_genes > 0]
top_genes = top_genes[[True if x in genes_use else False for x in top_genes.index]]

all_top_genes = top_genes.index.tolist()
all_top_genes_score = top_genes.tolist()

#Create output df
out_df = pd.DataFrame()

out_df['All_ranked'] = pd.Series(all_top_genes)
out_df['All_ranked_likelihood'] = pd.Series(all_top_genes_score)
out_df['WT_ranked'] = pd.Series(wt_top_genes)
out_df['WT_ranked_likelihood'] = pd.Series(wt_top_genes_score)
out_df['KO_ranked'] = pd.Series(ko_top_genes)
out_df['KO_ranked_likelihood'] = pd.Series(ko_top_genes_score)

WT_uniq = list(set(wt_top_genes) - (set(ko_top_genes) & set(wt_top_genes)))
KO_uniq = list(set(ko_top_genes) - (set(ko_top_genes) & set(wt_top_genes)))
common = list(set(ko_top_genes) & set(wt_top_genes))

out_df['WT_unique'] = pd.Series(WT_uniq)
out_df['KO_unique'] = pd.Series(KO_uniq)
out_df['WT-KO_common'] = pd.Series(common)
out_df.to_csv("likelihood_genes_drivers/act_WT-KO_likelihood_genes.csv", index=False)