# Cell type enrichment analysis

In [1]:
import functools
import numpy as np
import pandas as pd
from scipy.stats import fisher_exact
from statsmodels.stats.multitest import fdrcorrection

In [2]:
config = {
    'celltype': '../../../../../input/celltypes/_m/Zeisel_single_cell.tsv',
    'deg_file': '../../_m/genes/diffExpr_EAvsAA_full.txt',
}

In [3]:
@functools.lru_cache()
def get_celltype_genes():
    return pd.read_csv(config['celltype'], sep='\t', index_col=0)


@functools.lru_cache()
def get_deg():
    return pd.read_csv(config['deg_file'], sep='\t', index_col=0)


@functools.lru_cache()
def merge_dataframe(celltype):
    deg = get_deg()
    ct = get_celltype_genes()
    ctp = ct.loc[:, [celltype]]
    return pd.merge(deg, ctp, left_on='Symbol', right_index=True)

In [4]:
def cal_fishers_direction(celltype, direction):
    df = merge_dataframe(celltype)
    if direction == 'Up':
        df = df[(df['t'] > 0)].copy()
    elif direction == 'Down':
        df = df[(df['t'] < 0)].copy()
    else:
        df = df
    
    table = [[np.sum((df['adj.P.Val']<0.05) & (df[celltype] == 1)), 
              np.sum((df['adj.P.Val']<0.05) & (df[celltype] == 0))],
             [np.sum((df['adj.P.Val']>0.05) & (df[celltype] == 1)), 
              np.sum((df['adj.P.Val']>0.05) & (df[celltype] == 0))]]
    #print(table)
    return fisher_exact(table)

## Calculate enrichment

In [6]:
unique_celltypes = get_celltype_genes().columns.unique()
dir_lt = []; ct_lt = []; pval_lt = []; oddratio_lt = []
for direction in ['Up', 'Down', 'All']:
    for celltype in unique_celltypes:
        odd_ratio, pval = cal_fishers_direction(celltype, direction)
        ct_lt.append(celltype); pval_lt.append(pval);
        oddratio_lt.append(odd_ratio); dir_lt.append(direction)

dt = pd.DataFrame({'Cell_type': ct_lt, 'OR': oddratio_lt, 
                   'PValue': pval_lt, 'Direction': dir_lt})
_, fdr = fdrcorrection(dt.PValue)
dt['FDR'] = fdr

In [30]:
dt.to_csv('celltype_enrichment_analysis.txt', sep='\t', index=False)
dt[(dt.FDR <= 0.05)]