# Examine genes that are disconcordant across brain regrions

In [1]:
import functools
import numpy as np
import pandas as pd

In [6]:
config = {
    'caudate': '../../../../_m/genes/diffExpr_szVctl_full.txt',
    'dlpfc': '/ceph/projects/v4_phase3_paper/inputs/public_data/_m/phase2/dlpfc_diffExpr_szVctl_full.txt',
    'hippo': '/ceph/projects/v4_phase3_paper/inputs/public_data/_m/phase2/hippo_diffExpr_szVctl_full.txt',
}

In [3]:
@functools.lru_cache()
def get_deg(tissue):
    dft = pd.read_csv(config[tissue], sep='\t', index_col=0)
    dft['Feature'] = dft.index
    dft['Dir'] = np.sign(dft['t'])
    if 'gene_id' in dft.columns:
        dft['ensemblID'] = dft.gene_id.str.replace('\\..*', '')
    elif 'ensembl_gene_id' in dft.columns:
        dft.rename(columns={'ensembl_gene_id': 'ensemblID'}, inplace=True)
    return dft[['Feature', 'ensemblID', 'Symbol', 'adj.P.Val', 'logFC', 't', 'Dir']]


@functools.lru_cache()
def get_deg_sig(tissue, fdr):
    dft = get_deg(tissue)
    return dft[(dft['adj.P.Val'] < fdr)]


@functools.lru_cache()
def merge_dataframes(tissue1, tissue2):
    return get_deg(tissue1).merge(get_deg(tissue2), on='Feature', 
                                  suffixes=['_%s' % tissue1, '_%s' % tissue2])


@functools.lru_cache()
def merge_dataframes_sig(tissue1, tissue2):
    fdr1 = 0.05 if tissue1 != 'dlpfc' else 0.05
    fdr2 = 0.05 if tissue2 != 'dlpfc' else 0.05
    return get_deg_sig(tissue1, fdr1).merge(get_deg_sig(tissue2, fdr2), on='Feature', 
                                            suffixes=['_%s' % tissue1, '_%s' % tissue2])


In [4]:
def extract_disconcordant(tissue1, tissue2):
    df = merge_dataframes_sig(tissue1, tissue2)
    df = df[((df['Dir_%s' % tissue1] == 1) & (df['Dir_%s' % tissue2] == -1)) | 
            ((df['Dir_%s' % tissue1] == -1) & (df['Dir_%s' % tissue2] == 1))]
    return df.loc[:, ['Feature', 'Dir_%s' % tissue1, 'Dir_%s' % tissue2]]\
             .merge(get_deg(tissue1), on='Feature').drop('Dir', axis=1)

## BrainSeq Tissue Comparison

In [7]:
cd = extract_disconcordant('caudate', 'dlpfc')  
cd.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'dlpfc'), index=False)
print(cd.shape)
cd

(17, 8)


Unnamed: 0,Feature,Dir_caudate,Dir_dlpfc,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000188730.4,1.0,-1.0,ENSG00000188730,VWC2,1.459738e-13,0.372256,8.772073
1,ENSG00000132639.12,1.0,-1.0,ENSG00000132639,SNAP25,3.136443e-07,0.200865,6.221344
2,ENSG00000116679.15,1.0,-1.0,ENSG00000116679,IVNS1ABP,1.136882e-06,0.204298,5.955661
3,ENSG00000100285.9,1.0,-1.0,ENSG00000100285,NEFH,3.078136e-05,0.262173,5.187894
4,ENSG00000136040.8,-1.0,1.0,ENSG00000136040,PLXNC1,0.0001852441,-0.141434,-4.734092
5,ENSG00000204856.11,1.0,-1.0,ENSG00000204856,FAM216A,0.000674238,0.078138,4.368283
6,ENSG00000036530.8,-1.0,1.0,ENSG00000036530,CYP46A1,0.001859859,-0.088325,-4.051408
7,ENSG00000111262.4,1.0,-1.0,ENSG00000111262,KCNA1,0.002300858,0.126319,3.978675
8,ENSG00000151917.17,1.0,-1.0,ENSG00000151917,BEND6,0.01080767,0.075823,3.429727
9,ENSG00000143858.11,1.0,-1.0,ENSG00000143858,SYT2,0.01258174,0.133153,3.367376


In [8]:
ch = extract_disconcordant('caudate', 'hippo')  
ch.to_csv("disconcordant_genes_%s_%s.csv" % ('caudate', 'hippo'), index=False)
print(ch.shape)
ch

(2, 8)


Unnamed: 0,Feature,Dir_caudate,Dir_hippo,ensemblID,Symbol,adj.P.Val,logFC,t
0,ENSG00000128203.6,1.0,-1.0,ENSG00000128203,ASPHD2,8e-06,0.174235,5.517725
1,ENSG00000132872.11,1.0,-1.0,ENSG00000132872,SYT4,0.019922,0.11311,3.180046


In [9]:
print("There are %d genes where caudate is different from both DLPFC and hippocampus!\n" % 
 len(set(ch.ensemblID ) & set(cd.ensemblID)))

cd[(cd['ensemblID'].isin(list(set(ch.ensemblID ) & set(cd.ensemblID))))]

There are 0 genes where caudate is different from both DLPFC and hippocampus!



Unnamed: 0,Feature,Dir_caudate,Dir_dlpfc,ensemblID,Symbol,adj.P.Val,logFC,t


In [10]:
dh = extract_disconcordant('dlpfc', 'hippo')  
#dh.to_csv("disconcordant_genes_%s_%s.csv" % ('dlpfc', 'hippo'), index=False)
print(dh.shape)
dh

(0, 8)


Unnamed: 0,Dir_dlpfc,Dir_hippo,Feature,ensemblID,Symbol,adj.P.Val,logFC,t


In [11]:
print("There are %d genes where hippocampus is different from both caudate and DLPFC!\n" % 
 len(set(dh.ensemblID ) & set(ch.ensemblID)))

dh[(dh['ensemblID'].isin(list(set(dh.ensemblID ) & set(ch.ensemblID))))]

There are 0 genes where hippocampus is different from both caudate and DLPFC!



Unnamed: 0,Dir_dlpfc,Dir_hippo,Feature,ensemblID,Symbol,adj.P.Val,logFC,t


In [12]:
print("There are %d genes where DLPFC is different from both caudate and hippocampus!\n" % 
 len(set(dh.ensemblID ) & set(cd.ensemblID)))

There are 0 genes where DLPFC is different from both caudate and hippocampus!

