In [1]:
import pandas as pd
import os
from pathlib import Path
from os import listdir
from os.path import isdir, join

In [4]:
###----- Get dir & load / reload packages
try:
    code_dir
except NameError:
    print("Start")
    code_dir = os.getcwd()
    base_dir = code_dir.replace("/plotting_codes","")
    tools_dir = base_dir + "/tools"

else:
    print("Reload")

Reload


In [32]:
genome = "mm"

In [35]:
if genome == "hs":
    markers_file = 'https://raw.githubusercontent.com/Yolanda-HT/SurfaceMarkers_Cytokines/master/2_compiled/HS_MARKERS.csv'
    markers_df = pd.read_csv(markers_file)
    markers = markers_df['gene_name'].tolist()

    cts_file = 'https://raw.githubusercontent.com/Yolanda-HT/SurfaceMarkers_Cytokines/master/1_cytokine_source/Human_cytokines.csv'
    cts_df = pd.read_csv(cts_file)
    cts = cts_df['Gene Symbol'].tolist()
elif genome == "mm":
    markers_file = 'https://raw.githubusercontent.com/Yolanda-HT/SurfaceMarkers_Cytokines/master/2_compiled/MM_MARKERS.csv'
    markers_df = pd.read_csv(markers_file)
    markers = markers_df['gene_name'].tolist() 
    
    cts_file = 'https://raw.githubusercontent.com/Yolanda-HT/SurfaceMarkers_Cytokines/master/1_cytokine_source/Mouse_cytokines.txt'
    cts_df = pd.read_csv(cts_file, sep="\t", names=None)
    cts = list(set(cts_df.iloc[:,0].tolist()))

In [117]:
def marker_cytokine_select(de_dir, use_key, label_file, log2fc_c, pval_c):
    input_dir = de_dir + "/" + use_key
    diff_files = [str(x) for x in list(Path(input_dir).rglob("*differential.csv"))]
    
    ###----- Read label conversion
    label_df = pd.read_csv(label_file)
    old_labels = [str(x) for x in list(label_df['old_label'])]
    new_labels = [str(x) for x in list(label_df['new_label'])]
    
    markers_df = pd.DataFrame()
    cytokines_df = pd.DataFrame()

    #####---------- One versus all comparison
    if "each" in use_key.lower() and "all" in use_key.lower():
        for file_i in diff_files:
            i_base = file_i.split("/")[-1].replace("_differential.csv", "")
            if i_base in old_labels:
                i_base_newlabel = new_labels[old_labels.index(i_base)]
            else:
                i_base_newlabel = i_base

            # Select sig diff genes
            i_df = pd.read_csv(file_i)
            i_df_sig = i_df[i_df['t-test_overestim_var_padj'] <= pval_c]
            i_df_sig_up = i_df_sig[i_df_sig['t-test_overestim_var_logfc'] > log2fc_c]
            i_df_sig_up_genes = i_df_sig_up['gene_names'].tolist()

            # Select surface marker genes / cytokine genes
            i_markers = pd.Series(list(set(markers) & set(i_df_sig_up_genes)))
            i_cytokines = pd.Series(list(set(cts) & set(i_df_sig_up_genes)))

            markers_df[i_base_newlabel] = i_markers
            cytokines_df[i_base_newlabel] = i_markers
    
    #####---------- One versus Another comparison
    elif "per" in use_key.lower():
        groups = list(set([x.split("/")[-3] for x in diff_files]))
        groups.sort()
        conds = list(set([x.split("/")[-2] for x in diff_files]))
        conds.sort()

        for group in groups:
            if group in old_labels:
                group_newlabel = new_labels[old_labels.index(group)]
            else:
                group_newlabel = group
            for cond in conds:
                other_cond = [x for x in conds if x != cond][0]
                ij_file = input_dir + "/%s/%s/%s_vs_%s_differential.csv"%(group, cond, cond, other_cond)

                # Select sig diff genes
                ij_df = pd.read_csv(ij_file)
                ij_df_sig = ij_df[ij_df['t-test_overestim_var_padj'] <= pval_c]
                ij_df_sig_up = ij_df_sig[ij_df_sig['t-test_overestim_var_logfc'] > log2fc_c]
                ij_df_sig_up = ij_df_sig_up.sort_values(cond, ascending=False) # Sort by expression values
                ij_df_sig_up_genes = ij_df_sig_up['gene_names'].tolist()

                # Select surface marker genes / cytokine genes
                # Output is ranked by expression value in target cluster
                ij_markers = list(set(markers) & set(ij_df_sig_up_genes))
                ij_markers = [x for x in ij_df_sig_up_genes if x in ij_markers] # Rank by expr
                ij_cytokines = list(set(cts) & set(ij_df_sig_up_genes))
                ij_cytokines = [x for x in ij_df_sig_up_genes if x in ij_cytokines] # Rank by expr
                ij_markers = pd.Series(ij_markers)
                ij_cytokines = pd.Series(ij_cytokines)
                
                ij_colname = '%s_%s-vs-%s'%(group_newlabel, cond, other_cond)
                markers_df[ij_colname] = ij_markers
                cytokines_df[ij_colname] = ij_cytokines        
    
    #####---------- One versus one comparison
    else:
        labels_uniq = list(set([x.split("/")[-2] for x in diff_files]))
        labels_uniq.sort()
        labels_uniq
        
        for i in labels_uniq:
            if i in old_labels:
                i_newlabel = new_labels[old_labels.index(i)]
            else:
                i_newlabel = i
            i_markers_list = []
            i_cytokines_list = []
            for j in labels_uniq:
                if j == i:
                    i_markers_list.append("")
                    i_cytokines_list.append("")
                else:
                    ij_file = input_dir + "/%s/%s_vs_%s_differential.csv"%(i, i, j)

                    # Select sig diff genes
                    ij_df = pd.read_csv(ij_file)
                    ij_df_sig = ij_df[ij_df['t-test_overestim_var_padj'] <= pval_c]
                    ij_df_sig_up = ij_df_sig[ij_df_sig['t-test_overestim_var_logfc'] > log2fc_c]
                    ij_df_sig_up = ij_df_sig_up.sort_values(i, ascending=False) # Sort by expression values
                    ij_df_sig_up_genes = ij_df_sig_up['gene_names'].tolist()

                    # Select surface marker genes / cytokine genes
                    # Output is ranked by expression value in target cluster
                    ij_markers = list(set(markers) & set(ij_df_sig_up_genes))
                    ij_markers = [x for x in ij_df_sig_up_genes if x in ij_markers] # Rank by expr
                    ij_cytokines = list(set(cts) & set(ij_df_sig_up_genes))
                    ij_cytokines = [x for x in ij_df_sig_up_genes if x in ij_cytokines] # Rank by expr

                    i_markers_list.append("|".join(ij_markers))
                    i_cytokines_list.append("|".join(ij_cytokines))
            markers_df[i_newlabel] = i_markers_list
            cytokines_df[i_newlabel] = i_cytokines_list
        
        if i in old_labels:
            labels_uniq_new = [new_labels[old_labels.index(x)] for x in labels_uniq]
        else:
            labels_uniq_new = labels_uniq
        markers_df.index = labels_uniq_new
        cytokines_df.index = labels_uniq_new

    markers_df.to_csv("%s/%s_markers_p%s-log2fc%s.csv"%(de_dir, use_key, pval_c, log2fc_c))
    cytokines_df.to_csv("%s/%s_cytokines_p%s-log2fc%s.csv"%(de_dir, use_key, pval_c, log2fc_c))

In [128]:
de__dir = "/media/pipkin/ROCKET-PRO/CD8_DEV_SC/0_Acute-Chronic/1_Scanpy/0_Scanpy_out_resampled/2_DE"
label__file = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/6_Harmony/0_Combined/Exp391_newLabels.csv'

sub_dirs = [f for f in listdir(de__dir) if isdir(join(de__dir, f))]
for use__key in sub_dirs:
    marker_cytokine_select(de__dir, use__key, label__file, 2, 0.01)



In [129]:
de__dir = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/0_Acute-Chronic/1_Scanpy/1_Scanpy_out_resampled_ARM-NAV/2_DE'
label__file = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/6_Harmony/0_Combined/Exp391_newLabels.csv'

sub_dirs = [f for f in listdir(de__dir) if isdir(join(de__dir, f))]
for use__key in sub_dirs:
    marker_cytokine_select(de__dir, use__key, label__file, 2, 0.01)



In [130]:
de__dir = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/0_Acute-Chronic/1_Scanpy/1_Scanpy_out_resampled_Cl13-NAV/2_DE'
label__file = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/6_Harmony/0_Combined/Exp391_newLabels.csv'

sub_dirs = [f for f in listdir(de__dir) if isdir(join(de__dir, f))]
for use__key in sub_dirs:
    marker_cytokine_select(de__dir, use__key, label__file, 2, 0.01)



In [131]:
de__dir = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/1_IL2RAKO/1_Scanpy/0-all_Scanpy_out/2_DE'
label__file = '/media/pipkin/ROCKET-PRO/CD8_DEV_SC/6_Harmony/0_Combined/Exp334_newLabels.csv'

sub_dirs = [f for f in listdir(de__dir) if isdir(join(de__dir, f))]
for use__key in sub_dirs:
    marker_cytokine_select(de__dir, use__key, label__file, 2, 0.01)

