# Examine genes that are disconcordant across brain regrions

In [1]:
import functools
import numpy as np
import pandas as pd

In [2]:
config = {
    'caudate': '../../../../caudate/_m/genes/diffExpr_EAvsAA_full.txt',
    'dlpfc': '../../../../dlpfc/_m/genes/diffExpr_EAvsAA_full.txt',
    'hippo': '../../../../hippocampus/_m/genes/diffExpr_EAvsAA_full.txt',
    'gyrus': '../../../../dentateGyrus/_m/genes/diffExpr_EAvsAA_full.txt',
}

In [3]:
@functools.lru_cache()
def get_deg(tissue):
    dft = pd.read_csv(config[tissue], sep='\t', index_col=0)
    dft['Feature'] = dft.index
    dft['Dir'] = np.sign(dft['t'])
    if 'gene_id' in dft.columns:
        dft['ensemblID'] = dft.gene_id.str.replace('\\..*', '')
    elif 'ensembl_gene_id' in dft.columns:
        dft.rename(columns={'ensembl_gene_id': 'ensemblID'}, inplace=True)
    return dft[['Feature', 'ensemblID', 'Symbol', 'adj.P.Val', 'logFC', 't', 'Dir']]


@functools.lru_cache()
def get_deg_sig(tissue, fdr):
    dft = get_deg(tissue)
    return dft[(dft['adj.P.Val'] < fdr)]


@functools.lru_cache()
def merge_dataframes(tissue1, tissue2):
    return get_deg(tissue1).merge(get_deg(tissue2), on='Feature', 
                                  suffixes=['_%s' % tissue1, '_%s' % tissue2])


@functools.lru_cache()
def merge_dataframes_sig(tissue1, tissue2):
    fdr1 = 0.05 if tissue1 != 'dlpfc' else 0.05
    fdr2 = 0.05 if tissue2 != 'dlpfc' else 0.05
    return get_deg_sig(tissue1, fdr1).merge(get_deg_sig(tissue2, fdr2), on='Feature', 
                                            suffixes=['_%s' % tissue1, '_%s' % tissue2])


In [4]:
def extract_disconcordant(tissue1, tissue2):
    df = merge_dataframes_sig(tissue1, tissue2)
    df = df[((df['Dir_%s' % tissue1] == 1) & (df['Dir_%s' % tissue2] == -1)) | 
            ((df['Dir_%s' % tissue1] == -1) & (df['Dir_%s' % tissue2] == 1))]
    return df.loc[:, ['Feature', 'Dir_%s' % tissue1, 'Dir_%s' % tissue2]]\
             .merge(get_deg(tissue1), on='Feature').drop('Dir', axis=1)

## BrainSeq Tissue Comparison

In [5]:
cd = extract_disconcordant('caudate', 'dlpfc')  
cd.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'dlpfc'), index=False)
print(cd.shape)
cd

(51, 8)

In [6]:
ch = extract_disconcordant('caudate', 'hippo')  
ch.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'hippo'), index=False)
print(ch.shape)
ch

(14, 8)

In [7]:
print("There are %d genes where caudate is different from both DLPFC and hippocampus!\n" % 
 len(set(ch.ensemblID ) & set(cd.ensemblID)))

cd[(cd['ensemblID'].isin(list(set(ch.ensemblID ) & set(cd.ensemblID))))]

There are 6 genes where caudate is different from both DLPFC and hippocampus!



Unnamed: 0,Feature,Dir_caudate,Dir_dlpfc,ensemblID,Symbol,adj.P.Val,logFC,t
3,ENSG00000277883.1,1.0,-1.0,ENSG00000277883,NLRP3P1,4e-06,0.216787,5.41022
13,ENSG00000179889.18,-1.0,1.0,ENSG00000179889,PDXDC1,0.004927,-0.040339,-3.476557
31,ENSG00000143891.16,1.0,-1.0,ENSG00000143891,GALM,0.019087,0.094185,2.97199
33,ENSG00000143797.11,-1.0,1.0,ENSG00000143797,MBOAT2,0.020665,-0.042212,-2.938823
38,ENSG00000100302.6,1.0,-1.0,ENSG00000100302,RASD2,0.027692,0.090634,2.817245
41,ENSG00000065802.11,-1.0,1.0,ENSG00000065802,ASB1,0.034073,-0.033486,-2.72543


In [8]:
dh = extract_disconcordant('dlpfc', 'hippo')  
dh.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'hippo'), index=False)
print(dh.shape)
dh

In [10]:
print("There are %d genes where hippocampus is different from both caudate and DLPFC!\n" % 
 len(set(dh.ensemblID ) & set(ch.ensemblID)))

dh[(dh['ensemblID'].isin(list(set(dh.ensemblID ) & set(ch.ensemblID))))]

There are 1 genes where hippocampus is different from both caudate and DLPFC!



Unnamed: 0,Feature,Dir_dlpfc,Dir_hippo,ensemblID,Symbol,adj.P.Val,logFC,t
4,ENSG00000138207.13,1.0,-1.0,ENSG00000138207,RBP4,0.048067,0.069842,2.614497


In [11]:
print("There are %d genes where DLPFC is different from both caudate and hippocampus!\n" % 
 len(set(dh.ensemblID ) & set(cd.ensemblID)))

There are 0 genes where DLPFC is different from both caudate and hippocampus!



In [12]:
cg = extract_disconcordant('caudate', 'gyrus')  
cg.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'gyrus'), index=False)
print(cg.shape)
cg

(92, 8)

In [13]:
dg = extract_disconcordant('dlpfc', 'gyrus')  
dg.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'gyrus'), index=False)
print(dg.shape)
dg

(39, 8)

In [14]:
print("There are %d genes where dentate gyrus is different from both caudate and DLPFC!\n" % 
 len(set(cg.ensemblID ) & set(dg.ensemblID)))

cg[(cg['ensemblID'].isin(list(set(dg.ensemblID ) & set(cg.ensemblID))))]

There are 10 genes where dentate gyrus is different from both caudate and DLPFC!



Unnamed: 0,Feature,Dir_caudate,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
1,ENSG00000136235.15,1.0,-1.0,ENSG00000136235,GPNMB,1.324555e-10,0.684306,7.343311
2,ENSG00000106665.15,-1.0,1.0,ENSG00000106665,CLIP2,3.908762e-09,-0.112822,-6.752431
17,ENSG00000156076.9,1.0,-1.0,ENSG00000156076,WIF1,0.0001952671,0.272261,4.448384
21,ENSG00000181240.13,-1.0,1.0,ENSG00000181240,SLC25A41,0.0003224138,-0.372818,-4.31197
37,ENSG00000203797.9,-1.0,1.0,ENSG00000203797,DDO,0.005034072,-0.094586,-3.467339
41,ENSG00000171889.3,-1.0,1.0,ENSG00000171889,MIR31HG,0.005740283,-0.16243,-3.421673
44,ENSG00000174460.3,1.0,-1.0,ENSG00000174460,ZCCHC12,0.006950915,0.151884,3.351824
56,ENSG00000204624.7,-1.0,1.0,ENSG00000204624,DISP3,0.01539536,-0.094945,-3.05984
59,ENSG00000053371.12,1.0,-1.0,ENSG00000053371,AKR7A2,0.01742177,0.059218,3.01196
91,ENSG00000187122.16,-1.0,1.0,ENSG00000187122,SLIT1,0.04881015,-0.095963,-2.556815


In [15]:
hg = extract_disconcordant('hippo', 'gyrus')  
hg.to_csv("disconcordant_genes_%s_%s.csv" % ('hippo', 'gyrus'), index=False)
print(hg.shape)
hg

In [17]:
print("There are %d genes where dentate gyrus is different from both DLPFC and hippocampus!\n" % 
 len(set(hg.ensemblID ) & set(dg.ensemblID)))

hg[(hg['ensemblID'].isin(list(set(hg.ensemblID ) & set(dg.ensemblID))))]

There are 5 genes where hippocampus is different from both caudate and DLPFC!



Unnamed: 0,Feature,Dir_hippo,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000136235.15,1.0,-1.0,ENSG00000136235,GPNMB,4.364453e-07,0.48724,5.946357
1,ENSG00000127884.4,1.0,-1.0,ENSG00000127884,ECHS1,2.76183e-05,0.073086,5.023604
2,ENSG00000156076.9,1.0,-1.0,ENSG00000156076,WIF1,0.001037661,0.272336,4.056085
7,ENSG00000170776.20,1.0,-1.0,ENSG00000170776,AKAP13,0.01097197,0.069187,3.265853
9,ENSG00000053371.12,1.0,-1.0,ENSG00000053371,AKR7A2,0.01237529,0.056287,3.220799


In [21]:
print("There are %d genes where dentate gyrus is different from both caudate, DLPFC, and hippocampus!\n" % 
      len(set(cg.ensemblID) & set(dg.ensemblID) & set(hg.ensemblID)))

cg[(cg['ensemblID'].isin(list(set(cg.ensemblID) & set(dg.ensemblID) & set(hg.ensemblID))))]

There are 3 genes where dentate gyrus is different from both caudate, DLPFC, and hippocampus!



Unnamed: 0,Feature,Dir_caudate,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
1,ENSG00000136235.15,1.0,-1.0,ENSG00000136235,GPNMB,1.324555e-10,0.684306,7.343311
17,ENSG00000156076.9,1.0,-1.0,ENSG00000156076,WIF1,0.0001952671,0.272261,4.448384
59,ENSG00000053371.12,1.0,-1.0,ENSG00000053371,AKR7A2,0.01742177,0.059218,3.01196
