# Examine overlaps with published data

In [1]:
import pandas as pd
import numpy as np

## Public si-eQTL analysis

In [2]:
shen = ["GDAP2", "AIM2", "SLAMF6", "RLF", "ATG4C", "FUT7",
        "TMEM218", "C11orf74", "RAB35", "TMEM5", "HNRNPK",
        "CDCA3", "ERCC5", "GJB6", "SNTB2", "SPNS3", 
        "XAF1", "RBBP8", "RUFY4", "CA2", "RAPGEF1"]
print("Shen et al.:")
print(len(shen))

kukurba = ["NOD2", "WDR36", "BSCL2", "MAP7D3", "RHOXF1", "DNAH1"]
print("Kukurba et al.:")
print(len(kukurba))

yao = ["NOD2", "HLA-DRB5", "HLA-DRB5", "KIAA0586", "PPP2R5A", 
       "TSNAXIP1", "MUT", "GRIK2", "C15orf37", "LIMA1", "IL6ST", 
       "HCG8", "BLOC1S3", "NKX3-1", "CXorf23"]
print("Yao et al.:")
print(len(np.unique(yao)))
print("Total of Yao + Kukurba:")
len(set(yao) | set(kukurba))

Shen et al.:
21
Kukurba et al.:
6
Yao et al.:
14
Total of Yao + Kukurba:


19

## Load BrainSeq si-eQTL results

### Interacting variant-gene pairs

In [3]:
bs0 = pd.read_csv("../../summary_table/_m/BrainSeq_sexGenotypes_4features_3regions.txt.gz", sep='\t')
bs0["ensembl_gene_id"] = bs0.gene_id.str.replace("\\..*", "", regex=True)
biomart = pd.read_csv("../../../residualized_expression/final_plot_eqtls/_h/biomart.csv", index_col=0)
bs = bs0.merge(biomart, on="ensembl_gene_id").drop_duplicates(subset="gene_id")
print(bs.shape)
bs.tail(2)

(974, 11)


Unnamed: 0,Tissue,gene_id,gencodeID,variant_id,seqnames,lfsr,Type,ensembl_gene_id,external_gene_name,entrezgene,description
15826,Hippocampus,ENSG00000185386.14,ENSG00000185386.14,chr22:50563003:G:C,chr22,0.049299,Gene,ENSG00000185386,MAPK11,5600.0,mitogen-activated protein kinase 11 [Source:HG...
15827,Hippocampus,ENSG00000267251.2,ENSG00000267251.2,chr18:80244356:T:C,chr18,0.049375,Gene,ENSG00000267251,AC139100.1,,


In [4]:
bs[(bs['external_gene_name'].isin(shen))]

Unnamed: 0,Tissue,gene_id,gencodeID,variant_id,seqnames,lfsr,Type,ensembl_gene_id,external_gene_name,entrezgene,description
4670,Caudate,ENSG00000125703.14,ENSG00000125703.14,chr1:63060301:G:A,chr1,0.049896,Gene,ENSG00000125703,ATG4C,84938.0,autophagy related 4C cysteine peptidase [Sourc...


In [5]:
bs[(bs['external_gene_name'].isin(kukurba))]

Unnamed: 0,Tissue,gene_id,gencodeID,variant_id,seqnames,lfsr,Type,ensembl_gene_id,external_gene_name,entrezgene,description


In [6]:
bs[(bs['external_gene_name'].isin(yao))]

Unnamed: 0,Tissue,gene_id,gencodeID,variant_id,seqnames,lfsr,Type,ensembl_gene_id,external_gene_name,entrezgene,description


In [7]:
bs[(bs['external_gene_name'].isin(shen+kukurba+yao))]

Unnamed: 0,Tissue,gene_id,gencodeID,variant_id,seqnames,lfsr,Type,ensembl_gene_id,external_gene_name,entrezgene,description
4670,Caudate,ENSG00000125703.14,ENSG00000125703.14,chr1:63060301:G:A,chr1,0.049896,Gene,ENSG00000125703,ATG4C,84938.0,autophagy related 4C cysteine peptidase [Sourc...


### eigenMT corrected p-values

In [8]:
df0 = pd.read_csv("../../../../summary_table/_m/Brainseq_sex_interacting_4features_3regions.eFeatures.txt.gz", sep='\t')
df = df0[(df0["Type"] == "Gene")].drop_duplicates(subset="gene_id").drop("Tissue", axis=1)
df["ensembl_gene_id"] = df.gene_id.str.replace("\\..*", "", regex=True)
dft = df.merge(biomart, on="ensembl_gene_id")
print(dft.shape)
dft.head(2)

(5177, 14)


Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description
0,chr7:42936690:A:C,ENSG00000002746.14,ENSG00000002746.14,0.436502,7.820971,2.3e-05,0.010399,0.445692,445,Gene,ENSG00000002746,HECW1,23072.0,"HECT, C2 and WW domain containing E3 ubiquitin..."
1,chr17:48075934:C:T,ENSG00000002919.14,ENSG00000002919.14,-0.536903,-7.959488,0.000127,0.037424,0.549367,294,Gene,ENSG00000002919,SNX11,29916.0,sorting nexin 11 [Source:HGNC Symbol;Acc:HGNC:...


In [9]:
## Not all genes significant via mashr are also significant with eigenMT
len(set(bs.gene_id) & set(dft.gene_id))

568

In [10]:
dft[(dft['external_gene_name'].isin(shen))]

Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description
486,chr1:63052156:A:G,ENSG00000125703.14,ENSG00000125703.14,0.210967,10.344946,7.92388e-05,0.037955,0.551098,479,Gene,ENSG00000125703,ATG4C,84938.0,autophagy related 4C cysteine peptidase [Sourc...
1029,chr16:68729086:G:C,ENSG00000168807.16,ENSG00000168807.16,0.378104,11.451744,0.000109064,0.033483,0.541553,307,Gene,ENSG00000168807,SNTB2,6645.0,syntrophin beta 2 [Source:HGNC Symbol;Acc:HGNC...
2399,chr13:20258404:TC:T,ENSG00000121742.16,ENSG00000121742.16,0.593176,10.551991,8.33577e-07,0.000542,0.317735,650,Gene,ENSG00000121742,GJB6,10804.0,gap junction protein beta 6 [Source:HGNC Symbo...
2804,chr11:36347842:A:G,ENSG00000166352.15,ENSG00000166352.15,0.518875,8.165337,1.67119e-05,0.009492,0.539284,568,Gene,ENSG00000166352,C11orf74,119710.0,chromosome 11 open reading frame 74 [Source:HG...
3861,chr8:84969351:T:C,ENSG00000104267.9,ENSG00000104267.9,-0.230131,-11.113888,0.000308507,0.040723,0.693507,132,Gene,ENSG00000104267,CA2,760.0,carbonic anhydrase 2 [Source:HGNC Symbol;Acc:H...
4091,chr17:6627584:T:C,ENSG00000132530.16,ENSG00000132530.16,-0.515449,-14.299219,5.13514e-05,0.044214,0.698,861,Gene,ENSG00000132530,XAF1,54739.0,XIAP associated factor 1 [Source:HGNC Symbol;A...


In [11]:
dft[(dft['external_gene_name'].isin(shen)) & (dft["eigenMT_BH"] < 0.25)]

Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description


In [12]:
dft[(dft['external_gene_name'].isin(kukurba)) & (dft["eigenMT_BH"] < 0.25)]

Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description


In [13]:
dft[(dft['external_gene_name'].isin(yao)) & (dft["eigenMT_BH"] < 0.25)]

Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description


## GTEx comparison

In [14]:
gtex = pd.read_csv("/ceph/users/jbenja13/projects/sex_sz_ria/input/gtex_v8/"+\
                   "_m/GTEx_Analysis_v8_sbeQTLs/GTEx_Analysis_v8_sbeQTLs.txt", 
                   sep='\t')
gtex.iloc[0:2, 0:10]

Unnamed: 0,ensembl_gene_id,hugo_gene_id,gene_type,variant_id,rs_id,Tissue,maf,pval_nominal_sb,slope_sb,slope_se_sb
0,ENSG00000241860.6,RP11-34P13.13,processed_transcript,chr1_14677_G_A_b38,rs201327123,Adipose_Subcutaneous,0.051635,0.847114,0.05508,0.285537
1,ENSG00000227232.5,WASH7P,unprocessed_pseudogene,chr1_64764_C_T_b38,rs769952832,Adipose_Subcutaneous,0.061102,0.316881,0.222928,0.222511


In [15]:
gtex.iloc[0:2, 10:14]

Unnamed: 0,numtested,pvals.corrected,qval,pval_nominal_f
0,1,0.847114,1.0,0.022302
1,1,0.316881,0.981254,0.003978


In [16]:
## qval threshold equal to number of published sb-eQTL
gtex[(gtex['qval'] < 0.25) & (gtex["Tissue"].str.contains("Brain"))]\
    .loc[:, ["ensembl_gene_id", "hugo_gene_id", "Tissue", "pvals.corrected", 'qval']].head(10)

Unnamed: 0,ensembl_gene_id,hugo_gene_id,Tissue,pvals.corrected,qval
62155,ENSG00000026025.15,VIM,Brain_Amygdala,4e-06,0.012836
116842,ENSG00000160818.16,GPATCH4,Brain_Nucleus_accumbens_basal_ganglia,8.8e-05,0.198445
121904,ENSG00000141562.17,NARF,Brain_Nucleus_accumbens_basal_ganglia,5.6e-05,0.198445
122123,ENSG00000267174.5,CTC-510F12.4,Brain_Nucleus_accumbens_basal_ganglia,8.3e-05,0.198445


In [17]:
## qval threshold equal to number of published sb-eQTL
gtex[(gtex['qval'] < 0.25) & (gtex["Tissue"].str.contains("Whole"))]\
    .loc[:, ["ensembl_gene_id", "hugo_gene_id", "Tissue", "pvals.corrected", 'qval']].head(10)

Unnamed: 0,ensembl_gene_id,hugo_gene_id,Tissue,pvals.corrected,qval
362961,ENSG00000221571.3,RNU6ATAC35P,Whole_Blood,3.9e-05,0.139762
365043,ENSG00000196743.8,GM2A,Whole_Blood,1.1e-05,0.116825
367164,ENSG00000148459.15,PDSS1,Whole_Blood,2.7e-05,0.139762


In [18]:
gtex_sig = gtex[(gtex['qval'] < 0.25)]
gtex_sig.shape

(369, 22)

In [19]:
gtex_sig.head(10)

Unnamed: 0,ensembl_gene_id,hugo_gene_id,gene_type,variant_id,rs_id,Tissue,maf,pval_nominal_sb,slope_sb,slope_se_sb,...,qval,pval_nominal_f,slope_f,slope_se_f,pval_nominal_m,slope_m,slope_se_m,pval_nominal,slope,slope_se
1096,ENSG00000076356.6,PLXNA2,protein_coding,chr1_208030492_G_A_b38,rs3811383,Adipose_Subcutaneous,0.123924,5.3916e-05,0.338278,0.083064,...,0.121068,1.71888e-08,0.456729,0.075705,0.91557,0.009739,0.091682,2.7474e-05,0.17183,0.040604
5262,ENSG00000170632.13,ARMC10,protein_coding,chr7_103076937_C_T_b38,rs6958836,Adipose_Subcutaneous,0.169535,5.01113e-05,0.357403,0.087384,...,0.1929,0.493324,-0.054539,0.079379,3.21922e-07,-0.4298,0.079545,8.79753e-08,-0.216374,0.039857
5644,ENSG00000120907.17,ADRA1A,protein_coding,chr8_26839198_G_A_b38,rs117380715,Adipose_Subcutaneous,0.216867,1.04589e-05,-0.323552,0.072676,...,0.084548,4.63741e-18,-0.779707,0.076596,3.97666e-10,-0.469672,0.069091,5.637370000000001e-52,-0.568916,0.033334
6414,ENSG00000136830.11,FAM129B,protein_coding,chr9_127584339_G_A_b38,rs10739693,Adipose_Subcutaneous,0.304647,7.38701e-07,-0.28366,0.056579,...,0.004976,1.978e-06,-0.333315,0.066772,0.165338,-0.082625,0.059205,1.39316e-08,-0.168762,0.02926
7220,ENSG00000166787.3,SAA3P,transcribed_unprocessed_pseudogene,chr11_18269355_T_C_b38,rs34068567,Adipose_Subcutaneous,0.27883,2.20729e-05,0.32303,0.075427,...,0.074347,6.4094e-08,0.453034,0.078725,0.3138,0.063002,0.062292,2.4336e-08,0.21191,0.037395
8540,ENSG00000183463.5,URAD,protein_coding,chr13_27990205_T_A_b38,rs7335293,Adipose_Subcutaneous,0.5,9.0787e-09,-0.444892,0.076123,...,0.000122,9.98265e-21,-0.887723,0.078738,1.89229e-09,-0.457733,0.070571,3.07731e-53,-0.640604,0.036976
9191,ENSG00000282651.2,IGHV5-10-1,IG_V_gene,chr14_106114510_A_G_b38,rs4573838,Adipose_Subcutaneous,0.419105,2.02515e-05,-0.40676,0.094541,...,0.074347,5.24871e-12,-0.682629,0.089412,0.00280506,-0.289091,0.094806,3.45842e-21,-0.445408,0.045073
14611,ENSG00000143933.16,CALM2,protein_coding,chr2_46225349_C_T_b38,rs12477148,Adipose_Visceral_Omentum,0.072495,4.49793e-05,-0.480557,0.116471,...,0.161955,0.000474715,-0.491287,0.134732,0.916574,0.013043,0.124116,2.19775e-05,-0.246023,0.057281
15082,ENSG00000144410.4,CPO,protein_coding,chr2_206822186_C_T_b38,rs12470278,Adipose_Visceral_Omentum,0.097015,3.20412e-05,0.682291,0.162191,...,0.11537,0.116543,0.280837,0.176978,4.43106e-06,-0.558002,0.113158,7.8964e-06,-0.320288,0.070745
17452,ENSG00000211698.2,TRGV4,TR_V_gene,chr7_38361995_A_C_b38,rs10233345,Adipose_Visceral_Omentum,0.335821,6.4381e-05,0.427491,0.105837,...,0.139089,7.01123e-06,-0.481758,0.100091,8.85767e-15,-1.06884,0.112111,1.6303899999999999e-49,-0.838766,0.04909


### mashr

In [20]:
gtex_overlap = bs[(bs['gene_id'].isin(gtex_sig.ensembl_gene_id))].drop_duplicates()
print(gtex_overlap.shape)
gtex_overlap

(14, 11)


Unnamed: 0,Tissue,gene_id,gencodeID,variant_id,seqnames,lfsr,Type,ensembl_gene_id,external_gene_name,entrezgene,description
2638,Caudate,ENSG00000105695.14,ENSG00000105695.14,chr19:35065453:C:T,chr19,0.04469,Gene,ENSG00000105695,MAG,4099.0,myelin associated glycoprotein [Source:HGNC Sy...
3163,Caudate,ENSG00000109775.10,ENSG00000109775.10,chr4:185462033:A:G,chr4,0.03458,Gene,ENSG00000109775,UFSP2,55325.0,UFM1 specific peptidase 2 [Source:HGNC Symbol;...
4518,Caudate,ENSG00000124406.16,ENSG00000124406.16,chr4:42387723:C:A,chr4,0.013706,Gene,ENSG00000124406,ATP8A1,10396.0,ATPase phospholipid transporting 8A1 [Source:H...
5118,Caudate,ENSG00000132199.18,ENSG00000132199.18,chr18:381997:C:G,chr18,0.044197,Gene,ENSG00000132199,ENOSF1,55556.0,enolase superfamily member 1 [Source:HGNC Symb...
5635,Caudate,ENSG00000136830.11,ENSG00000136830.11,chr9:127456523:G:A,chr9,0.044628,Gene,ENSG00000136830,FAM129B,64855.0,family with sequence similarity 129 member B [...
6605,Caudate,ENSG00000143933.16,ENSG00000143933.16,chr2:47607174:T:C,chr2,0.022399,Gene,ENSG00000143933,CALM2,805.0,calmodulin 2 [Source:HGNC Symbol;Acc:HGNC:1445]
6687,Caudate,ENSG00000146530.11,ENSG00000146530.11,chr7:12170840:G:A,chr7,0.023059,Gene,ENSG00000146530,VWDE,221806.0,von Willebrand factor D and EGF domains [Sourc...
10405,Caudate,ENSG00000181264.8,ENSG00000181264.8,chr11:120171373:G:A,chr11,0.002548,Gene,ENSG00000181264,TMEM136,219902.0,transmembrane protein 136 [Source:HGNC Symbol;...
11623,Caudate,ENSG00000189067.12,ENSG00000189067.12,chr16:11300271:A:G,chr16,0.007562,Gene,ENSG00000189067,LITAF,9516.0,lipopolysaccharide induced TNF factor [Source:...
12455,Caudate,ENSG00000204248.10,ENSG00000204248.10,chr6:33170900:G:A,chr6,0.045996,Gene,ENSG00000204248,COL11A2,1302.0,collagen type XI alpha 2 chain [Source:HGNC Sy...


In [21]:
gtex_overlap.shape[0]/bs.shape[0] * 100

1.4373716632443532

In [22]:
gtex_sig[(gtex_sig['ensembl_gene_id'].isin(bs.gene_id))]

Unnamed: 0,ensembl_gene_id,hugo_gene_id,gene_type,variant_id,rs_id,Tissue,maf,pval_nominal_sb,slope_sb,slope_se_sb,...,qval,pval_nominal_f,slope_f,slope_se_f,pval_nominal_m,slope_m,slope_se_m,pval_nominal,slope,slope_se
6414,ENSG00000136830.11,FAM129B,protein_coding,chr9_127584339_G_A_b38,rs10739693,Adipose_Subcutaneous,0.304647,7.38701e-07,-0.28366,0.056579,...,0.004976,1.978e-06,-0.333315,0.066772,0.165338,-0.082625,0.059205,1.39316e-08,-0.168762,0.02926
14611,ENSG00000143933.16,CALM2,protein_coding,chr2_46225349_C_T_b38,rs12477148,Adipose_Visceral_Omentum,0.072495,4.49793e-05,-0.480557,0.116471,...,0.161955,0.000474715,-0.491287,0.134732,0.916574,0.013043,0.124116,2.19775e-05,-0.246023,0.057281
45230,ENSG00000181264.8,TMEM136,protein_coding,chr11_120493228_T_C_b38,rs4938809,Artery_Coronary,0.403756,1.83269e-05,0.3329,0.075523,...,0.098269,6.15984e-08,0.485968,0.074741,0.19901,0.113737,0.087221,8.49135e-08,0.231096,0.041304
135656,ENSG00000235098.8,ANKRD65,protein_coding,chr1_1419214_A_G_b38,rs3766165,Breast_Mammary_Tissue,0.131313,7.06337e-08,0.499559,0.090555,...,0.000111,7.56894e-06,-0.439928,0.091937,1.44354e-11,-0.879777,0.11199,3.28196e-30,-0.726153,0.057266
137724,ENSG00000124406.16,ATP8A1,protein_coding,chr4_42653682_A_T_b38,rs17448575,Breast_Mammary_Tissue,0.35443,0.000257775,0.24624,0.066638,...,0.090512,2.98919e-07,-0.323359,0.057862,1.09564e-13,-0.613169,0.068668,1.46214e-26,-0.370315,0.031672
137946,ENSG00000109775.10,UFSP2,protein_coding,chr4_185425919_G_C_b38,rs11132303,Breast_Mammary_Tissue,0.309343,4.67454e-07,-0.348749,0.067806,...,0.000399,9.9429e-10,0.432381,0.06255,7.75008e-18,0.75861,0.068668,2.0149099999999998e-57,0.706155,0.035862
138621,ENSG00000204248.10,COL11A2,protein_coding,chr6_33081200_G_GA_b38,rs113353922,Breast_Mammary_Tissue,0.118687,0.000619905,0.418594,0.121102,...,0.102197,0.230403,-0.125417,0.103786,0.000272098,-0.553034,0.145261,4.48205e-06,-0.293079,0.062803
138995,ENSG00000146530.11,VWDE,protein_coding,chr7_12312811_T_C_b38,rs4721084,Breast_Mammary_Tissue,0.45202,1.74134e-06,-0.301523,0.061908,...,0.000744,5.40871e-12,-0.446355,0.055294,0.00150158,-0.237816,0.072364,2.76501e-20,-0.319694,0.032375
142471,ENSG00000189067.12,LITAF,protein_coding,chr16_11551157_A_T_b38,rs11644920,Breast_Mammary_Tissue,0.339646,0.000119688,0.247997,0.063687,...,0.035705,0.000330719,-0.171901,0.045841,2.92507e-07,-0.385154,0.068855,9.49552e-22,-0.323697,0.031398
143690,ENSG00000267056.2,AC005336.4,processed_pseudogene,chr19_15934939_G_A_b38,rs12985091,Breast_Mammary_Tissue,0.478535,7.22863e-05,-0.373013,0.092781,...,0.015797,1.96198e-07,-0.587505,0.103261,0.00193534,-0.245596,0.076638,1.17601e-12,-0.331468,0.044801


In [23]:
gtex_sig[(gtex_sig['ensembl_gene_id'].isin(bs.gene_id))]\
    .to_csv("siEQTL_gtex_comparison.csv", index=False)

### eigenMT corrected

In [24]:
gtex_overlap = dft[(dft['gene_id'].isin(gtex_sig.ensembl_gene_id)) & (dft["BF"] < 0.05)].drop_duplicates()
print(gtex_overlap.shape)
gtex_overlap.sort_values("BF").head()

(57, 14)


Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description
473,chr4:42392391:T:G,ENSG00000124406.16,ENSG00000124406.16,-0.291917,-13.197929,4e-06,0.002091,0.362373,502,Gene,ENSG00000124406,ATP8A1,10396.0,ATPase phospholipid transporting 8A1 [Source:H...
4271,chr8:11603602:A:G,ENSG00000154328.15,ENSG00000154328.15,0.380078,14.18335,2e-06,0.00213,0.458802,888,Gene,ENSG00000154328,NEIL2,252969.0,nei like DNA glycosylase 2 [Source:HGNC Symbol...
406,chr1:89159687:G:A,ENSG00000117226.11,ENSG00000117226.11,0.431197,15.848961,9e-06,0.002374,0.362373,266,Gene,ENSG00000117226,GBP3,2635.0,guanylate binding protein 3 [Source:HGNC Symbo...
2556,chr8:63463308:T:C,ENSG00000137563.11,ENSG00000137563.11,0.527946,10.192544,7e-06,0.00246,0.438136,335,Gene,ENSG00000137563,GGH,8836.0,gamma-glutamyl hydrolase [Source:HGNC Symbol;A...
3627,chr22:25059120:A:C,ENSG00000272977.1,ENSG00000272977.1,0.626299,16.616479,3e-06,0.002482,0.438136,744,Gene,ENSG00000272977,AL008721.2,,


In [25]:
gtex_overlap = dft[(dft['gene_id'].isin(gtex_sig.ensembl_gene_id))].drop_duplicates()
print(gtex_overlap.shape)
gtex_overlap.sort_values("BF").head()

(57, 14)


Unnamed: 0,variant_id,gene_id,gencodeID,slope,statistic,pval_nominal,BF,eigenMT_BH,TESTS,Type,ensembl_gene_id,external_gene_name,entrezgene,description
473,chr4:42392391:T:G,ENSG00000124406.16,ENSG00000124406.16,-0.291917,-13.197929,4e-06,0.002091,0.362373,502,Gene,ENSG00000124406,ATP8A1,10396.0,ATPase phospholipid transporting 8A1 [Source:H...
4271,chr8:11603602:A:G,ENSG00000154328.15,ENSG00000154328.15,0.380078,14.18335,2e-06,0.00213,0.458802,888,Gene,ENSG00000154328,NEIL2,252969.0,nei like DNA glycosylase 2 [Source:HGNC Symbol...
406,chr1:89159687:G:A,ENSG00000117226.11,ENSG00000117226.11,0.431197,15.848961,9e-06,0.002374,0.362373,266,Gene,ENSG00000117226,GBP3,2635.0,guanylate binding protein 3 [Source:HGNC Symbo...
2556,chr8:63463308:T:C,ENSG00000137563.11,ENSG00000137563.11,0.527946,10.192544,7e-06,0.00246,0.438136,335,Gene,ENSG00000137563,GGH,8836.0,gamma-glutamyl hydrolase [Source:HGNC Symbol;A...
3627,chr22:25059120:A:C,ENSG00000272977.1,ENSG00000272977.1,0.626299,16.616479,3e-06,0.002482,0.438136,744,Gene,ENSG00000272977,AL008721.2,,


In [26]:
gtex_overlap.shape[0]/dft.shape[0] * 100

1.1010237589337455