# Examine genes that are disconcordant across brain regrions

In [None]:
import functools
import numpy as np
import pandas as pd

In [None]:
config = {
    'caudate': '../../../../caudate/_m/genes/diffExpr_EAvsAA_full.txt',
    'dlpfc': '../../../../dlpfc/_m/genes/diffExpr_EAvsAA_full.txt',
    'hippo': '../../../../hippocampus/_m/genes/diffExpr_EAvsAA_full.txt',
    'gyrus': '../../../../dentateGyrus/_m/genes/diffExpr_EAvsAA_full.txt',
}

In [None]:
@functools.lru_cache()
def get_deg(tissue):
    dft = pd.read_csv(config[tissue], sep='\t', index_col=0)
    dft['Feature'] = dft.index
    dft['Dir'] = np.sign(dft['t'])
    if 'gene_id' in dft.columns:
        dft['ensemblID'] = dft.gene_id.str.replace('\\..*', '', regex=True)
    elif 'ensembl_gene_id' in dft.columns:
        dft.rename(columns={'ensembl_gene_id': 'ensemblID'}, inplace=True)
    return dft[['Feature', 'ensemblID', 'Symbol', 'adj.P.Val', 'logFC', 't', 'Dir']]


@functools.lru_cache()
def get_deg_sig(tissue, fdr):
    dft = get_deg(tissue)
    return dft[(dft['adj.P.Val'] < fdr)]


@functools.lru_cache()
def merge_dataframes(tissue1, tissue2):
    return get_deg(tissue1).merge(get_deg(tissue2), on='Feature', 
                                  suffixes=['_%s' % tissue1, '_%s' % tissue2])


@functools.lru_cache()
def merge_dataframes_sig(tissue1, tissue2):
    fdr1 = 0.05 if tissue1 != 'dlpfc' else 0.05
    fdr2 = 0.05 if tissue2 != 'dlpfc' else 0.05
    return get_deg_sig(tissue1, fdr1).merge(get_deg_sig(tissue2, fdr2), on='Feature', 
                                            suffixes=['_%s' % tissue1, '_%s' % tissue2])


In [None]:
def extract_disconcordant(tissue1, tissue2):
    df = merge_dataframes_sig(tissue1, tissue2)
    df = df[((df['Dir_%s' % tissue1] == 1) & (df['Dir_%s' % tissue2] == -1)) | 
            ((df['Dir_%s' % tissue1] == -1) & (df['Dir_%s' % tissue2] == 1))]
    return df.loc[:, ['Feature', 'Dir_%s' % tissue1, 'Dir_%s' % tissue2]]\
             .merge(get_deg(tissue1), on='Feature').drop('Dir', axis=1)

## BrainSeq Tissue Comparison

In [None]:
cd = extract_disconcordant('caudate', 'dlpfc')  
cd.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'dlpfc'), index=False)
print(cd.shape)
cd

In [None]:
ch = extract_disconcordant('caudate', 'hippo')  
ch.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'hippo'), index=False)
print(ch.shape)
ch

In [None]:
print("There are %d genes where caudate is different from both DLPFC and hippocampus!\n" % 
 len(set(ch.ensemblID ) & set(cd.ensemblID)))

cd[(cd['ensemblID'].isin(list(set(ch.ensemblID ) & set(cd.ensemblID))))]

In [None]:
dh = extract_disconcordant('dlpfc', 'hippo')  
dh.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'hippo'), index=False)
print(dh.shape)
dh

In [None]:
print("There are %d genes where hippocampus is different from both caudate and DLPFC!\n" % 
 len(set(dh.ensemblID ) & set(ch.ensemblID)))

dh[(dh['ensemblID'].isin(list(set(dh.ensemblID ) & set(ch.ensemblID))))]

In [None]:
print("There are %d genes where DLPFC is different from both caudate and hippocampus!\n" % 
 len(set(dh.ensemblID ) & set(cd.ensemblID)))

In [None]:
cg = extract_disconcordant('caudate', 'gyrus')  
cg.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'gyrus'), index=False)
print(cg.shape)
cg

In [None]:
dg = extract_disconcordant('dlpfc', 'gyrus')  
dg.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'gyrus'), index=False)
print(dg.shape)
dg

In [None]:
print("There are %d genes where dentate gyrus is different from both caudate and DLPFC!\n" % 
 len(set(cg.ensemblID ) & set(dg.ensemblID)))

cg[(cg['ensemblID'].isin(list(set(dg.ensemblID ) & set(cg.ensemblID))))]

In [None]:
hg = extract_disconcordant('hippo', 'gyrus')  
hg.to_csv("disconcordant_genes_%s_%s.csv" % ('hippo', 'gyrus'), index=False)
print(hg.shape)
hg

In [None]:
print("There are %d genes where dentate gyrus is different from both DLPFC and hippocampus!\n" % 
 len(set(hg.ensemblID ) & set(dg.ensemblID)))

hg[(hg['ensemblID'].isin(list(set(hg.ensemblID ) & set(dg.ensemblID))))]

In [None]:
print("There are %d genes where dentate gyrus is different from both caudate, DLPFC, and hippocampus!\n" % 
      len(set(cg.ensemblID) & set(dg.ensemblID) & set(hg.ensemblID)))

cg[(cg['ensemblID'].isin(list(set(cg.ensemblID) & set(dg.ensemblID) & set(hg.ensemblID))))]