# Examine genes that are disconcordant across brain regrions

In [1]:
import functools
import numpy as np
import pandas as pd

In [2]:
config = {
    'caudate': '../../../../caudate/_m/genes/diffExpr_EAvsAA_full.txt',
    'dlpfc': '../../../../dlpfc/_m/genes/diffExpr_EAvsAA_full.txt',
    'hippo': '../../../../hippocampus/_m/genes/diffExpr_EAvsAA_full.txt',
    'gyrus': '../../../../dentateGyrus/_m/genes/diffExpr_EAvsAA_full.txt',
}

In [3]:
@functools.lru_cache()
def get_deg(tissue):
    dft = pd.read_csv(config[tissue], sep='\t', index_col=0)
    dft['Feature'] = dft.index
    dft['Dir'] = np.sign(dft['t'])
    if 'gene_id' in dft.columns:
        dft['ensemblID'] = dft.gene_id.str.replace('\\..*', '', regex=True)
    elif 'ensembl_gene_id' in dft.columns:
        dft.rename(columns={'ensembl_gene_id': 'ensemblID'}, inplace=True)
    return dft[['Feature', 'ensemblID', 'Symbol', 'adj.P.Val', 'logFC', 't', 'Dir']]


@functools.lru_cache()
def get_deg_sig(tissue, fdr):
    dft = get_deg(tissue)
    return dft[(dft['adj.P.Val'] < fdr)]


@functools.lru_cache()
def merge_dataframes(tissue1, tissue2):
    return get_deg(tissue1).merge(get_deg(tissue2), on='Feature', 
                                  suffixes=['_%s' % tissue1, '_%s' % tissue2])


@functools.lru_cache()
def merge_dataframes_sig(tissue1, tissue2):
    fdr1 = 0.05 if tissue1 != 'dlpfc' else 0.05
    fdr2 = 0.05 if tissue2 != 'dlpfc' else 0.05
    return get_deg_sig(tissue1, fdr1).merge(get_deg_sig(tissue2, fdr2), on='Feature', 
                                            suffixes=['_%s' % tissue1, '_%s' % tissue2])


In [4]:
def extract_disconcordant(tissue1, tissue2):
    df = merge_dataframes_sig(tissue1, tissue2)
    df = df[((df['Dir_%s' % tissue1] == 1) & (df['Dir_%s' % tissue2] == -1)) | 
            ((df['Dir_%s' % tissue1] == -1) & (df['Dir_%s' % tissue2] == 1))]
    return df.loc[:, ['Feature', 'Dir_%s' % tissue1, 'Dir_%s' % tissue2]]\
             .merge(get_deg(tissue1), on='Feature').drop('Dir', axis=1)

## BrainSeq Tissue Comparison

In [5]:
cd = extract_disconcordant('caudate', 'dlpfc')  
cd.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'dlpfc'), index=False)
print(cd.shape)
cd

(8, 8)


Unnamed: 0,Feature,Dir_caudate,Dir_dlpfc,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000277883.1,1.0,-1.0,ENSG00000277883,NLRP3P1,6.56221e-07,0.404942,6.067922
1,ENSG00000070601.9,1.0,-1.0,ENSG00000070601,FRMPD1,0.01220536,0.205316,3.352644
2,ENSG00000006625.17,-1.0,1.0,ENSG00000006625,GGCT,0.01784174,-0.106137,-3.200556
3,ENSG00000100302.6,1.0,-1.0,ENSG00000100302,RASD2,0.0201982,0.179923,3.146998
4,ENSG00000198034.10,-1.0,1.0,ENSG00000198034,RPS4X,0.02940114,-0.060516,-2.981437
5,ENSG00000027075.13,-1.0,1.0,ENSG00000027075,PRKCH,0.03445104,-0.130863,-2.912396
6,ENSG00000161544.9,1.0,-1.0,ENSG00000161544,CYGB,0.04099586,0.176233,2.835293
7,ENSG00000133574.9,-1.0,1.0,ENSG00000133574,GIMAP4,0.04672012,-0.156888,-2.774555


In [6]:
ch = extract_disconcordant('caudate', 'hippo')  
ch.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'hippo'), index=False)
print(ch.shape)
ch

(7, 8)


Unnamed: 0,Feature,Dir_caudate,Dir_hippo,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000130558.19,-1.0,1.0,ENSG00000130558,OLFM1,1.9e-05,-0.228691,-5.292123
1,ENSG00000106952.7,-1.0,1.0,ENSG00000106952,TNFSF8,4.3e-05,-0.41272,-5.081967
2,ENSG00000196455.7,1.0,-1.0,ENSG00000196455,PIK3R4,0.000243,0.111391,4.617778
3,ENSG00000138207.13,1.0,-1.0,ENSG00000138207,RBP4,0.001274,0.220448,4.142458
4,ENSG00000104044.15,-1.0,1.0,ENSG00000104044,OCA2,0.006757,-0.289518,-3.585903
5,ENSG00000139719.9,1.0,-1.0,ENSG00000139719,VPS33A,0.019055,0.068472,3.169776
6,ENSG00000123329.17,-1.0,1.0,ENSG00000123329,ARHGAP9,0.040745,-0.166918,-2.839868


In [7]:
print("There are %d genes where caudate is different from both DLPFC and hippocampus!\n" % 
 len(set(ch.ensemblID ) & set(cd.ensemblID)))

cd[(cd['ensemblID'].isin(list(set(ch.ensemblID ) & set(cd.ensemblID))))]

There are 0 genes where caudate is different from both DLPFC and hippocampus!



Unnamed: 0,Feature,Dir_caudate,Dir_dlpfc,ensemblID,Symbol,adj.P.Val,logFC,t


In [8]:
dh = extract_disconcordant('dlpfc', 'hippo')  
dh.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'hippo'), index=False)
print(dh.shape)
dh

(3, 8)


Unnamed: 0,Feature,Dir_dlpfc,Dir_hippo,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000078902.15,-1.0,1.0,ENSG00000078902,TOLLIP,0.019953,-0.082392,-3.188057
1,ENSG00000114670.13,-1.0,1.0,ENSG00000114670,NEK11,0.025698,-0.078108,-3.078731
2,ENSG00000138207.13,1.0,-1.0,ENSG00000138207,RBP4,0.040661,0.132448,2.866446


In [9]:
print("There are %d genes where hippocampus is different from both caudate and DLPFC!\n" % 
 len(set(dh.ensemblID ) & set(ch.ensemblID)))

dh[(dh['ensemblID'].isin(list(set(dh.ensemblID ) & set(ch.ensemblID))))]

There are 1 genes where hippocampus is different from both caudate and DLPFC!



Unnamed: 0,Feature,Dir_dlpfc,Dir_hippo,ensemblID,Symbol,adj.P.Val,logFC,t
2,ENSG00000138207.13,1.0,-1.0,ENSG00000138207,RBP4,0.040661,0.132448,2.866446


In [10]:
print("There are %d genes where DLPFC is different from both caudate and hippocampus!\n" % 
 len(set(dh.ensemblID ) & set(cd.ensemblID)))

There are 0 genes where DLPFC is different from both caudate and hippocampus!



In [11]:
cg = extract_disconcordant('caudate', 'gyrus')  
cg.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'gyrus'), index=False)
print(cg.shape)
cg

(13, 8)


Unnamed: 0,Feature,Dir_caudate,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000277883.1,1.0,-1.0,ENSG00000277883,NLRP3P1,6.56221e-07,0.404942,6.067922
1,ENSG00000075234.16,-1.0,1.0,ENSG00000075234,TTC38,7.654728e-06,-0.301819,-5.508393
2,ENSG00000134265.12,1.0,-1.0,ENSG00000134265,NAPG,0.001247357,0.080082,4.149428
3,ENSG00000101224.17,-1.0,1.0,ENSG00000101224,CDC25B,0.001598725,-0.118644,-4.069179
4,ENSG00000068831.18,1.0,-1.0,ENSG00000068831,RASGRP2,0.002788749,0.139554,3.89175
5,ENSG00000256537.4,1.0,-1.0,ENSG00000256537,SMIM10L1,0.002984501,0.111219,3.870464
6,ENSG00000125845.6,-1.0,1.0,ENSG00000125845,BMP2,0.01131502,-0.209347,-3.383479
7,ENSG00000105700.10,-1.0,1.0,ENSG00000105700,KXD1,0.01440904,-0.071441,-3.288598
8,ENSG00000189376.11,1.0,-1.0,ENSG00000189376,C8orf76,0.01626238,0.145713,3.239705
9,ENSG00000228624.7,-1.0,1.0,ENSG00000228624,,0.01774571,-0.110123,-3.202949


In [12]:
dg = extract_disconcordant('dlpfc', 'gyrus')  
dg.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'gyrus'), index=False)
print(dg.shape)
dg

(3, 8)


Unnamed: 0,Feature,Dir_dlpfc,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000147251.15,1.0,-1.0,ENSG00000147251,DOCK11,0.000729,0.138798,4.368086
1,ENSG00000188386.6,-1.0,1.0,ENSG00000188386,PPP3R2,0.013399,-0.240428,-3.34398
2,ENSG00000280294.1,-1.0,1.0,ENSG00000280294,,0.022723,-0.118239,-3.13276


In [13]:
print("There are %d genes where dentate gyrus is different from both caudate and DLPFC!\n" % 
 len(set(cg.ensemblID ) & set(dg.ensemblID)))

cg[(cg['ensemblID'].isin(list(set(dg.ensemblID ) & set(cg.ensemblID))))]

There are 0 genes where dentate gyrus is different from both caudate and DLPFC!



Unnamed: 0,Feature,Dir_caudate,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t


In [14]:
hg = extract_disconcordant('hippo', 'gyrus')  
hg.to_csv("disconcordant_genes_%s_%s.csv" % ('hippo', 'gyrus'), index=False)
print(hg.shape)
hg

(4, 8)


Unnamed: 0,Feature,Dir_hippo,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000101224.17,-1.0,1.0,ENSG00000101224,CDC25B,0.000833,-0.137511,-4.293268
1,ENSG00000075234.16,-1.0,1.0,ENSG00000075234,TTC38,0.012194,-0.165073,-3.367048
2,ENSG00000165410.14,-1.0,1.0,ENSG00000165410,CFL2,0.031319,-0.111948,-2.967453
3,ENSG00000214140.10,-1.0,1.0,ENSG00000214140,PRCD,0.04347,-0.165288,-2.808149


In [15]:
print("There are %d genes where dentate gyrus is different from both DLPFC and hippocampus!\n" % 
 len(set(hg.ensemblID ) & set(dg.ensemblID)))

hg[(hg['ensemblID'].isin(list(set(hg.ensemblID ) & set(dg.ensemblID))))]

There are 0 genes where dentate gyrus is different from both DLPFC and hippocampus!



Unnamed: 0,Feature,Dir_hippo,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t


In [16]:
print("There are %d genes where dentate gyrus is different from both caudate, DLPFC, and hippocampus!\n" % 
      len(set(cg.ensemblID) & set(dg.ensemblID) & set(hg.ensemblID)))

cg[(cg['ensemblID'].isin(list(set(cg.ensemblID) & set(dg.ensemblID) & set(hg.ensemblID))))]

There are 0 genes where dentate gyrus is different from both caudate, DLPFC, and hippocampus!



Unnamed: 0,Feature,Dir_caudate,Dir_gyrus,ensemblID,Symbol,adj.P.Val,logFC,t
