# Cell type enrichment analysis

In [None]:
import functools
import numpy as np
import pandas as pd
from scipy.stats import fisher_exact
from statsmodels.stats.multitest import multipletests

In [None]:
config = {
    'shared_file': '../../../../differential_expression/tissue_comparison/upset_plots/_m/BrainSeq_shared_degs_annotation.txt',
    'xci_file': '../../_h/xci_status_hg19.txt',
    'background': "background.txt"
}

In [None]:
@functools.lru_cache()
def get_deg():
    return pd.read_csv(config['shared_file'], sep='\t')


@functools.lru_cache()
def get_xci():
    return pd.read_csv(config['xci_file'], sep='\t')


@functools.lru_cache()
def get_background():
    return pd.read_csv(config['background'], sep='\t')

In [None]:
def cal_fishers(status):
    xci = get_xci()
    xci['ensemblID'] = xci['Gene ID'].str.replace("\\..*", "", regex=True)
    df = get_background().drop_duplicates()\
                         .merge(get_deg(), on="ensemblID", how="left")\
                         .drop(['gene_id', 'gene_name'], axis=1)\
                         .merge(xci, on='ensemblID', how="left")
    df['Dir'] = df.Dir.fillna(0)
    table = [[np.sum((df['Dir']!=0) & (df['Combined XCI status'] == status)), 
              np.sum((df['Dir']!=0) & (df['Combined XCI status'] != status))], 
             [np.sum((df['Dir']==0) & (df['Combined XCI status'] == status)), 
              np.sum((df['Dir']==0) & (df['Combined XCI status'] != status))]]
    print(table)
    return fisher_exact(table)


def cal_fishers_direction(status, direction):
    deg = get_deg()
    if direction == 'Up':
        deg = deg[(deg['Dir'] > 0)].copy()
    else:
        deg = deg[(deg['Dir'] < 0)].copy()
    xci = get_xci()
    xci['ensemblID'] = xci['Gene ID'].str.replace("\\..*", "", regex=True)
    df = get_background().drop_duplicates()\
                         .merge(deg, on="ensemblID", how="left")\
                         .drop(['gene_id', 'gene_name'], axis=1)\
                         .merge(xci, on='ensemblID', how="left")
    df['Dir'] = df.Dir.fillna(0)
    table = [[np.sum((df['Dir']!=0) & (df['Combined XCI status'] == status)), 
              np.sum((df['Dir']!=0) & (df['Combined XCI status'] != status))], 
             [np.sum((df['Dir']==0) & (df['Combined XCI status'] == status)), 
              np.sum((df['Dir']==0) & (df['Combined XCI status'] != status))]]
    print(table)
    return fisher_exact(table)


def cal_fisher_by_xci_status():
    xci_status = xci = get_xci().loc[:, 'Combined XCI status'].unique()
    xci_lt = []; pval_lt = []; oddratio_lt = []; dir_lt = [];
    for status in xci_status:
        odd_ratio, pval = cal_fishers(status)
        xci_lt.append(status); pval_lt.append(pval);
        oddratio_lt.append(odd_ratio); dir_lt.append('All')
        if pval < 0.05:
            print("There is a significant enrichment (p-value < %.1e) of %s!" % 
                  (pval, status))

    for status in xci_status:
        odd_ratio, pval = cal_fishers_direction(status, "Up")
        xci_lt.append(status); pval_lt.append(pval);
        oddratio_lt.append(odd_ratio); dir_lt.append('Male Bias')
        if pval < 0.05:
            print("There is a significant enrichment of male bias genes (p-value < %.1e) of %s!" %
                  (pval, status))
        
    for status in xci_status:
        odd_ratio, pval = cal_fishers_direction(status, "Down")
        xci_lt.append(status); pval_lt.append(pval);
        oddratio_lt.append(odd_ratio); dir_lt.append('Female Bias')
        if pval < 0.05:
            print("There is a significant enrichment of female bias genes (p-value < %.1e) of %s!" % 
                  (pval, status))
    return pd.DataFrame({'XCI status': xci_lt, 'OR': oddratio_lt, 
                         'PValue': pval_lt, 'Direction': dir_lt})

## Calculated fisher's exact for each XCI status

In [None]:
df = cal_fisher_by_xci_status()
_, fdr, _, _ = multipletests(df.PValue, method='fdr_bh')
df['FDR'] = fdr
df[(df.FDR <= 0.05)]

In [None]:
df

In [None]:
df.to_csv('xci_enrichment_analysis.txt', sep='\t', index=False)