In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc

from sys import path
from os.path import abspath
from os import makedirs
path.append(abspath("/home/ng136/nico"))
import ng_functions as ng

from glob import glob
from pathlib import Path
import warnings
from time import time
from datetime import datetime
from tqdm import tqdm
import gseapy
import json

# Load data and metadata

In [3]:
n_states = ['N1a', 'N1b', 'N2', 'N3', 'N4', 'N5', 'N6']

In [4]:
n_states_dict = {'N1a' : 'Sell+Ngp+ neutrophils',
                 'N1b' : 'Sell+Lst1+ neutrophils',
                 'N2' : 'Sell+Cxcl10+ neutrophils',
                 'N3' : 'Cxcl3+ neutrophils',
                 'N4' : 'Siglecf+Xbp1+ neutrophils',
                 'N5' : 'Siglecf+Ccl3+ neutrophils',
                 'N6' : 'Siglecf+Ngp+ neutrophils'}

neu_palette_dict = {'Sell+Ngp+ neutrophils': '#6EA6CD',
                    'Sell+Lst1+ neutrophils': '#4A7BB7',
                    'Sell+Cxcl10+ neutrophils': '#364B9A',
                    'Cxcl3+ neutrophils': '#EAECCC',
                    'Siglecf+Xbp1+ neutrophils': '#A50026',
                    'Siglecf+Ccl3+ neutrophils': '#DD3D2D',
                    'Siglecf+Ngp+ neutrophils': '#F67E4B'}

In [8]:
ndata = sc.read('/n/groups/klein/nico/neutrophils/backups/totalseq_exp2_neutrophils_untreated_annotated_embedding_5258x13126_backup_220422_15h46.h5ad')
udata = ndata[ndata.obs.sample_condition.str.startswith('KP19')].copy()

In [10]:
sc.tl.rank_genes_groups(udata, groupby='smoothed_Zilionis', 
                        method='wilcoxon', use_raw=False, 
                        layer='log1p', key_added='wilcoxon_subsets')


# Generate matrix

In [11]:
def second_largest(df):
    return (df.nlargest(2).min())

In [17]:
fc_gene_list = {}
dge_tables = {}

for state in n_states:
    enrichments = sc.get.rank_genes_groups_df(udata, group= state, 
                                        key='wilcoxon_subsets', log2fc_min=0.25,
                                        pval_cutoff=0.01).sort_values('logfoldchanges', ascending=False)

    marker_exp = ng.groupby_aggregate(udata, ['smoothed_Zilionis', 'library_name'], f=np.mean, layer='cp10k', return_df=True, var_names=enrichments['names']).groupby(['smoothed_Zilionis']).mean()
    
    marker_logfc = np.log2((marker_exp + 1).divide((marker_exp.apply(second_largest) +1), axis=1))
    
    fc_gene_list[state] = marker_logfc.sort_values(state, axis=1, ascending=False)
    
    
    dge_tables[state] = fc_gene_list[state].T.join(enrichments.set_index('names')).loc[:,[state, 'scores', 'pvals', 'pvals_adj']].rename({state:'log2_fold_change_max_to_2nd_max',
                                                                                                                  'scores': 'standardized_U_statistic_MWU',
                                                                                                                  'pvals' : 'p_value_MWU',
                                                                                                                  'pvals_adj': 'FDR'},axis=1).sort_values('log2_fold_change_max_to_2nd_max', ascending=False)

    dge_tables[state]['enriched_in'] = state
    dge_tables[state]['shown_in_Fig2'] = min(100, len(dge_tables[state]))*[True] + max(0, len(dge_tables[state])-100)*[False]
    
    
    fc_gene_list[state] = fc_gene_list[state].columns

# Export table

In [20]:
supplementary_table = pd.concat([dge_tables[state][dge_tables[state]['log2_fold_change_max_to_2nd_max'] >= 0.1375] for state in n_states])

In [26]:
supplementary_table.to_csv('fig2b_untreated_supplementary_table.csv')