# Number of tissues affected per eQTL

## Housekeeping

### Imports

In [1]:
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine
import re

In [2]:
engine = create_engine('mysql+mysqlconnector://jupyter:password@localhost:3306/gtex', echo=False)

### Functions

Remove version numbers from Ensembl Gene IDs e.g. 'ENSG000000001.8' to 'ENSG000000001'. Enables comparison between IDs from different sources

In [3]:
def removeGeneIDVersions(text):
    return re.findall('(ENSG\d+)', text)[0]

### Constants

#### List of GTEx tissues

In [4]:
tissues = ['Adipose - Subcutaneous',
 'Adipose - Visceral (Omentum)',
 'Adrenal Gland',
 'Artery - Aorta',
 'Artery - Coronary',
 'Artery - Tibial',
 'Brain - Amygdala',
 'Brain - Anterior cingulate cortex (BA24)',
 'Brain - Caudate (basal ganglia)',
 'Brain - Cerebellar Hemisphere',
 'Brain - Cerebellum',
 'Brain - Cortex',
 'Brain - Frontal Cortex (BA9)',
 'Brain - Hippocampus',
 'Brain - Hypothalamus',
 'Brain - Nucleus accumbens (basal ganglia)',
 'Brain - Putamen (basal ganglia)',
 'Brain - Spinal cord (cervical c-1)',
 'Brain - Substantia nigra',
 'Breast - Mammary Tissue',
 'Cells - EBV-transformed lymphocytes',
 'Cells - Transformed fibroblasts',
 'Colon - Sigmoid',
 'Colon - Transverse',
 'Esophagus - Gastroesophageal Junction',
 'Esophagus - Mucosa',
 'Esophagus - Muscularis',
 'Heart - Atrial Appendage',
 'Heart - Left Ventricle',
 'Liver',
 'Lung',
 'Minor Salivary Gland',
 'Muscle - Skeletal',
 'Nerve - Tibial',
 'Ovary',
 'Pancreas',
 'Pituitary',
 'Prostate',
 'Skin - Not Sun Exposed (Suprapubic)',
 'Skin - Sun Exposed (Lower leg)',
 'Small Intestine - Terminal Ileum',
 'Spleen',
 'Stomach',
 'Testis',
 'Thyroid',
 'Uterus',
 'Vagina',
 'Whole Blood']

---

## Analysis

In [5]:
eQTLsAndNumTissuesAffected = pd.read_csv('../../outputFiles/GTExV7/eQTLsAndNumTissuesAffected.txt')
eQTLsAndNumTissuesAffected.rename(columns={'gene_id': 'Ensembl Gene ID'}, inplace=True)
eQTLsAndNumTissuesAffected

Unnamed: 0,variant_id,Ensembl Gene ID,countUncorrected,countBonferroniCorrected,countMetasoft
0,10_100000625_A_G_b37,ENSG00000138131,5,4.0,5
1,10_100000625_A_G_b37,ENSG00000166024,4,3.0,9
2,10_100000625_A_G_b37,ENSG00000230928,4,3.0,9
3,10_100000645_A_C_b37,ENSG00000138131,1,1.0,2
4,10_100000645_A_C_b37,ENSG00000230928,11,10.0,29
5,10_100002841_C_CT_b37,ENSG00000230928,2,0.0,14
6,10_100003242_T_G_b37,ENSG00000166024,1,0.0,9
7,10_100003242_T_G_b37,ENSG00000230928,1,0.0,4
8,10_100003785_T_C_b37,ENSG00000138131,6,3.0,5
9,10_100003785_T_C_b37,ENSG00000166024,4,2.0,12


In [6]:
eQTLsAndNumTissuesAffected.describe()

Unnamed: 0,countUncorrected,countBonferroniCorrected,countMetasoft
count,7627598.0,7627598.0,7627598.0
mean,4.822141,2.978086,16.51631
std,7.782665,6.681433,15.61892
min,1.0,0.0,0.0
25%,1.0,0.0,2.0
50%,2.0,1.0,11.0
75%,4.0,2.0,30.0
max,48.0,48.0,48.0


### Number of tissues affect per eQTL for each gene group

#### Ohnologs

##### Bonferroni-corrected eQTLs

In [7]:
genesWitheQTLTissueCountBonferroniAndOhnologStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountBonferroniAndOhnologStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountBonferroniAndOhnologStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,type,propOfExpressedAffectedByeQTL
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno,0.083333
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,SSD,1.000000
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,SSD,0.021277
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,SSD,0.433333
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,ohno,0.166667
5,ENSG00000182974,1.0,1,15,22368478,22369561,1,SSD,1.000000
6,ENSG00000155304,1.0,48,21,15743436,15755805,-1,singleton,0.020833
7,ENSG00000153575,8.0,48,15,22833395,22873892,1,SSD,0.166667
8,ENSG00000180530,2.0,48,21,16333556,16437321,-1,singleton,0.041667
9,ENSG00000137824,9.0,48,15,41028082,41048049,-1,ohno,0.187500


In [8]:
eQTLsAndNumTissuesAffectedOhno = pd.merge(eQTLsAndNumTissuesAffected, genesWitheQTLTissueCountBonferroniAndOhnologStatus, how="inner", on="Ensembl Gene ID")
eQTLsAndNumTissuesAffectedOhnoBonf = eQTLsAndNumTissuesAffectedOhno.loc[eQTLsAndNumTissuesAffectedOhno['countBonferroniCorrected'] > 0].copy()
eQTLsAndNumTissuesAffectedOhnoBonf

Unnamed: 0,variant_id,Ensembl Gene ID,countUncorrected,countBonferroniCorrected,countMetasoft,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,type,propOfExpressedAffectedByeQTL
0,10_100000625_A_G_b37,ENSG00000138131,5,4.0,5,8.0,48,10,100007447,100028007,-1,ohno,0.166667
1,10_100000645_A_C_b37,ENSG00000138131,1,1.0,2,8.0,48,10,100007447,100028007,-1,ohno,0.166667
2,10_100003785_T_C_b37,ENSG00000138131,6,3.0,5,8.0,48,10,100007447,100028007,-1,ohno,0.166667
3,10_100004360_G_A_b37,ENSG00000138131,1,1.0,2,8.0,48,10,100007447,100028007,-1,ohno,0.166667
4,10_100004441_G_C_b37,ENSG00000138131,5,3.0,5,8.0,48,10,100007447,100028007,-1,ohno,0.166667
5,10_100004906_C_A_b37,ENSG00000138131,5,5.0,6,8.0,48,10,100007447,100028007,-1,ohno,0.166667
6,10_100004996_G_A_b37,ENSG00000138131,6,3.0,5,8.0,48,10,100007447,100028007,-1,ohno,0.166667
7,10_100005282_C_T_b37,ENSG00000138131,5,5.0,5,8.0,48,10,100007447,100028007,-1,ohno,0.166667
8,10_100007362_G_C_b37,ENSG00000138131,1,1.0,2,8.0,48,10,100007447,100028007,-1,ohno,0.166667
9,10_100008436_G_A_b37,ENSG00000138131,6,3.0,5,8.0,48,10,100007447,100028007,-1,ohno,0.166667


In [9]:
eQTLsAndNumTissuesAffectedOhnoBonf['eQTLAffectedPropExpressed'] = 0
eQTLsAndNumTissuesAffectedOhnoBonf.loc[:,'eQTLAffectedPropExpressed'] = eQTLsAndNumTissuesAffectedOhnoBonf['countBonferroniCorrected'] / eQTLsAndNumTissuesAffectedOhnoBonf['expressedTissues']

In [10]:
eQTLsAndNumTissuesAffectedOhnoBonf.groupby('type')['countBonferroniCorrected'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SSD,1127919.0,5.057446,7.907779,1.0,1.0,2.0,5.0,48.0
ohno,969051.0,3.480253,5.312576,1.0,1.0,1.0,3.0,48.0
singleton,925297.0,5.259131,7.596849,1.0,1.0,2.0,6.0,48.0


In [11]:
eQTLsAndNumTissuesAffectedOhnoBonf.groupby('type')['eQTLAffectedPropExpressed'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SSD,1127919.0,0.12611,0.190306,0.020833,0.020833,0.041667,0.125,1.0
ohno,969051.0,0.078424,0.116558,0.020833,0.020833,0.041667,0.083333,1.0
singleton,925297.0,0.121846,0.174095,0.020833,0.020833,0.041667,0.136364,1.0


In [13]:
eQTLsAndNumTissuesAffectedOhnoBonf.to_csv(
    '../../outputFiles/pereQTLNumTissuesAffected/pereQTLNumTissuesAffectedOhnologsBonferroni.csv.xz',
    index=False,
    compression="xz")

##### Metasoft eQTLs

In [14]:
genesWitheQTLTissueCountMetasoftAndOhnologStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountMetasoftAndOhnologStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountMetasoftAndOhnologStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,propOfExpressedAffectedByeQTL,type
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,0.500000,ohno
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,0.333333,SSD
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,0.212766,SSD
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,1.000000,SSD
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,1.000000,ohno
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,0.479167,singleton
6,ENSG00000197414,1.0,4,15,22736246,22746002,1,0.250000,SSD
7,ENSG00000166200,44.0,48,15,49398268,49447858,-1,0.916667,SSD
8,ENSG00000155307,4.0,48,21,15857549,15955723,-1,0.083333,SSD
9,ENSG00000243440,2.0,7,21,15964251,16031142,-1,0.285714,singleton


In [15]:
eQTLsAndNumTissuesAffectedOhnoMeta = pd.merge(eQTLsAndNumTissuesAffected, genesWitheQTLTissueCountMetasoftAndOhnologStatus, how="inner", on="Ensembl Gene ID")
eQTLsAndNumTissuesAffectedOhnoMeta = eQTLsAndNumTissuesAffectedOhnoMeta.loc[eQTLsAndNumTissuesAffectedOhnoMeta['countMetasoft'] > 0].copy()
eQTLsAndNumTissuesAffectedOhnoMeta

Unnamed: 0,variant_id,Ensembl Gene ID,countUncorrected,countBonferroniCorrected,countMetasoft,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,propOfExpressedAffectedByeQTL,type
0,10_100000625_A_G_b37,ENSG00000138131,5,4.0,5,20.0,48,10,100007447,100028007,-1,0.416667,ohno
1,10_100000645_A_C_b37,ENSG00000138131,1,1.0,2,20.0,48,10,100007447,100028007,-1,0.416667,ohno
2,10_100003785_T_C_b37,ENSG00000138131,6,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,ohno
3,10_100004360_G_A_b37,ENSG00000138131,1,1.0,2,20.0,48,10,100007447,100028007,-1,0.416667,ohno
4,10_100004441_G_C_b37,ENSG00000138131,5,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,ohno
5,10_100004906_C_A_b37,ENSG00000138131,5,5.0,6,20.0,48,10,100007447,100028007,-1,0.416667,ohno
6,10_100004996_G_A_b37,ENSG00000138131,6,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,ohno
7,10_100005282_C_T_b37,ENSG00000138131,5,5.0,5,20.0,48,10,100007447,100028007,-1,0.416667,ohno
8,10_100007362_G_C_b37,ENSG00000138131,1,1.0,2,20.0,48,10,100007447,100028007,-1,0.416667,ohno
9,10_100008436_G_A_b37,ENSG00000138131,6,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,ohno


In [16]:
eQTLsAndNumTissuesAffectedOhnoMeta['eQTLAffectedPropExpressed'] = 0
eQTLsAndNumTissuesAffectedOhnoMeta.loc[:,'eQTLAffectedPropExpressed'] = eQTLsAndNumTissuesAffectedOhnoMeta['countMetasoft'] / eQTLsAndNumTissuesAffectedOhnoMeta['expressedTissues']

In [17]:
eQTLsAndNumTissuesAffectedOhnoMeta.groupby('type')['countMetasoft'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SSD,1812158.0,17.543768,15.43565,1.0,3.0,12.0,31.0,48.0
ohno,1611438.0,12.578578,12.979378,1.0,2.0,7.0,20.0,48.0
singleton,1502376.0,19.22579,15.352724,1.0,4.0,16.0,33.0,48.0


In [18]:
eQTLsAndNumTissuesAffectedOhnoMeta.groupby('type')['eQTLAffectedPropExpressed'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SSD,1812158.0,0.392727,0.323986,0.020833,0.083333,0.3125,0.680851,1.0
ohno,1611438.0,0.273098,0.27233,0.020833,0.047619,0.166667,0.4375,1.0
singleton,1502376.0,0.416301,0.318457,0.020833,0.104167,0.358974,0.708333,1.0


In [19]:
eQTLsAndNumTissuesAffectedOhnoMeta.to_csv(
    '../../outputFiles/pereQTLNumTissuesAffected/pereQTLNumTissuesAffectedOhnologsMetasoft.csv.xz',
    index=False,
    compression="xz")

#### CNVs (Zarrei et al. CNV map)

##### Bonferroni-corrected eQTLs

In [20]:
genesWitheQTLTissueCountBonferroniAndCNVZarreiStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountBonferroniAndCNVZarreiStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountBonferroniAndCNVZarreiStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,CNV,propOfExpressedAffectedByeQTL
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,N,0.083333
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,Y,1.000000
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,Y,0.021277
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,N,0.433333
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,N,0.166667
5,ENSG00000182974,1.0,1,15,22368478,22369561,1,Y,1.000000
6,ENSG00000155304,1.0,48,21,15743436,15755805,-1,N,0.020833
7,ENSG00000153575,8.0,48,15,22833395,22873892,1,Y,0.166667
8,ENSG00000180530,2.0,48,21,16333556,16437321,-1,Y,0.041667
9,ENSG00000137824,9.0,48,15,41028082,41048049,-1,N,0.187500


In [21]:
eQTLsAndNumTissuesAffectedCNVZarreiBonf = pd.merge(eQTLsAndNumTissuesAffected, genesWitheQTLTissueCountBonferroniAndCNVZarreiStatus, how="inner", on="Ensembl Gene ID")
eQTLsAndNumTissuesAffectedCNVZarreiBonf = eQTLsAndNumTissuesAffectedCNVZarreiBonf.loc[eQTLsAndNumTissuesAffectedCNVZarreiBonf['countBonferroniCorrected'] > 0].copy()
eQTLsAndNumTissuesAffectedCNVZarreiBonf

Unnamed: 0,variant_id,Ensembl Gene ID,countUncorrected,countBonferroniCorrected,countMetasoft,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,CNV,propOfExpressedAffectedByeQTL
0,10_100000625_A_G_b37,ENSG00000138131,5,4.0,5,8.0,48,10,100007447,100028007,-1,Y,0.166667
1,10_100000645_A_C_b37,ENSG00000138131,1,1.0,2,8.0,48,10,100007447,100028007,-1,Y,0.166667
2,10_100003785_T_C_b37,ENSG00000138131,6,3.0,5,8.0,48,10,100007447,100028007,-1,Y,0.166667
3,10_100004360_G_A_b37,ENSG00000138131,1,1.0,2,8.0,48,10,100007447,100028007,-1,Y,0.166667
4,10_100004441_G_C_b37,ENSG00000138131,5,3.0,5,8.0,48,10,100007447,100028007,-1,Y,0.166667
5,10_100004906_C_A_b37,ENSG00000138131,5,5.0,6,8.0,48,10,100007447,100028007,-1,Y,0.166667
6,10_100004996_G_A_b37,ENSG00000138131,6,3.0,5,8.0,48,10,100007447,100028007,-1,Y,0.166667
7,10_100005282_C_T_b37,ENSG00000138131,5,5.0,5,8.0,48,10,100007447,100028007,-1,Y,0.166667
8,10_100007362_G_C_b37,ENSG00000138131,1,1.0,2,8.0,48,10,100007447,100028007,-1,Y,0.166667
9,10_100008436_G_A_b37,ENSG00000138131,6,3.0,5,8.0,48,10,100007447,100028007,-1,Y,0.166667


In [22]:
eQTLsAndNumTissuesAffectedCNVZarreiBonf['eQTLAffectedPropExpressed'] = 0
eQTLsAndNumTissuesAffectedCNVZarreiBonf.loc[:,'eQTLAffectedPropExpressed'] = eQTLsAndNumTissuesAffectedCNVZarreiBonf['countBonferroniCorrected'] / eQTLsAndNumTissuesAffectedCNVZarreiBonf['expressedTissues']

In [24]:
eQTLsAndNumTissuesAffectedCNVZarreiBonf.groupby('CNV')['countBonferroniCorrected'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
CNV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,1608568.0,4.394149,6.63004,1.0,1.0,2.0,4.0,48.0
Y,1413699.0,4.86306,7.628158,1.0,1.0,2.0,5.0,48.0


In [25]:
eQTLsAndNumTissuesAffectedCNVZarreiBonf.groupby('CNV')['eQTLAffectedPropExpressed'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
CNV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,1608568.0,0.10405,0.156651,0.020833,0.020833,0.041667,0.104167,1.0
Y,1413699.0,0.115732,0.176162,0.020833,0.020833,0.041667,0.125,1.0


In [26]:
eQTLsAndNumTissuesAffectedCNVZarreiBonf.to_csv(
    '../../outputFiles/pereQTLNumTissuesAffected/pereQTLNumTissuesAffectedCNVZarreiBonferroni.csv.xz',
    index=False,
    compression="xz")

##### Metasoft eQTLs

In [27]:
genesWitheQTLTissueCountMetasoftAndCNVZarreiStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountMetasoftAndCNVZarreiStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountMetasoftAndCNVZarreiStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,propOfExpressedAffectedByeQTL,CNV
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,0.500000,N
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,0.333333,Y
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,0.212766,Y
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,1.000000,N
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,1.000000,N
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,0.479167,N
6,ENSG00000197414,1.0,4,15,22736246,22746002,1,0.250000,Y
7,ENSG00000166200,44.0,48,15,49398268,49447858,-1,0.916667,N
8,ENSG00000155307,4.0,48,21,15857549,15955723,-1,0.083333,N
9,ENSG00000243440,2.0,7,21,15964251,16031142,-1,0.285714,N


In [28]:
eQTLsAndNumTissuesAffectedCNVZarreiMeta = pd.merge(eQTLsAndNumTissuesAffected, genesWitheQTLTissueCountMetasoftAndCNVZarreiStatus, how="inner", on="Ensembl Gene ID")
eQTLsAndNumTissuesAffectedCNVZarreiMeta = eQTLsAndNumTissuesAffectedCNVZarreiMeta.loc[eQTLsAndNumTissuesAffectedCNVZarreiMeta['countMetasoft'] > 0].copy()
eQTLsAndNumTissuesAffectedCNVZarreiMeta

Unnamed: 0,variant_id,Ensembl Gene ID,countUncorrected,countBonferroniCorrected,countMetasoft,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,propOfExpressedAffectedByeQTL,CNV
0,10_100000625_A_G_b37,ENSG00000138131,5,4.0,5,20.0,48,10,100007447,100028007,-1,0.416667,Y
1,10_100000645_A_C_b37,ENSG00000138131,1,1.0,2,20.0,48,10,100007447,100028007,-1,0.416667,Y
2,10_100003785_T_C_b37,ENSG00000138131,6,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,Y
3,10_100004360_G_A_b37,ENSG00000138131,1,1.0,2,20.0,48,10,100007447,100028007,-1,0.416667,Y
4,10_100004441_G_C_b37,ENSG00000138131,5,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,Y
5,10_100004906_C_A_b37,ENSG00000138131,5,5.0,6,20.0,48,10,100007447,100028007,-1,0.416667,Y
6,10_100004996_G_A_b37,ENSG00000138131,6,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,Y
7,10_100005282_C_T_b37,ENSG00000138131,5,5.0,5,20.0,48,10,100007447,100028007,-1,0.416667,Y
8,10_100007362_G_C_b37,ENSG00000138131,1,1.0,2,20.0,48,10,100007447,100028007,-1,0.416667,Y
9,10_100008436_G_A_b37,ENSG00000138131,6,3.0,5,20.0,48,10,100007447,100028007,-1,0.416667,Y


In [29]:
eQTLsAndNumTissuesAffectedCNVZarreiMeta['eQTLAffectedPropExpressed'] = 0
eQTLsAndNumTissuesAffectedCNVZarreiMeta.loc[:,'eQTLAffectedPropExpressed'] = eQTLsAndNumTissuesAffectedCNVZarreiMeta['countMetasoft'] / eQTLsAndNumTissuesAffectedCNVZarreiMeta['expressedTissues']

In [30]:
eQTLsAndNumTissuesAffectedCNVZarreiMeta.groupby('CNV')['countMetasoft'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
CNV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,2676649.0,16.272304,14.687134,1.0,3.0,11.0,28.0,48.0
Y,2249323.0,16.623134,15.171126,1.0,3.0,11.0,29.0,48.0


In [31]:
eQTLsAndNumTissuesAffectedCNVZarreiMeta.groupby('CNV')['eQTLAffectedPropExpressed'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
CNV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,2676649.0,0.355149,0.307108,0.020833,0.081081,0.25,0.604167,1.0
Y,2249323.0,0.367486,0.318603,0.020833,0.083333,0.270833,0.645833,1.0


In [32]:
eQTLsAndNumTissuesAffectedCNVZarreiMeta.to_csv(
    '../../outputFiles/pereQTLNumTissuesAffected/pereQTLNumTissuesAffectedCNVZarreiMetasoft.csv.xz',
    index=False,
    compression="xz")

#### CNVs (ExAC CNV data)

##### Bonferroni-corrected eQTLs

In [67]:
genesWitheQTLTissueCountBonferroniAndCNVExACStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountBonferroniAndCNVExACStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountBonferroniAndCNVExACStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,gene,chr,start,...,del.sing,dup.sing,del.sing.score,dup.sing.score,del.score,dup.score,cnv.score,flag,CNV,propOfExpressedAffectedByeQTL
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ENSG00000138593.4,15,49280673,...,0,0,1.006221,0.976367,1.092999,1.275552,1.523702,0,N,0.083333
1,ENSG00000168675,1.0,47,18,13217497,13652754,1,ENSG00000168675.14,18,13217497,...,0,0,0.620675,0.737930,0.723574,-0.029705,0.297903,0,Y,0.021277
2,ENSG00000185272,8.0,48,21,15588451,15600693,1,ENSG00000185272.9,21,15588451,...,0,0,0.187194,0.266138,0.728772,-1.183084,-0.702303,1,Y,0.166667
3,ENSG00000155304,1.0,48,21,15743436,15755805,-1,ENSG00000155304.4,21,15743436,...,0,0,0.416765,0.407245,0.962883,0.202447,0.548186,0,Y,0.020833
4,ENSG00000180530,2.0,48,21,16333556,16437321,-1,ENSG00000180530.5,21,16333556,...,0,0,0.093782,0.097961,0.577557,0.553032,0.719674,0,N,0.041667
5,ENSG00000137824,9.0,48,15,41028082,41048049,-1,ENSG00000137824.11,15,41028082,...,3,0,-1.590585,0.776270,-0.421097,0.534328,0.192307,0,Y,0.187500
6,ENSG00000137880,1.0,48,15,41056218,41059906,1,ENSG00000137880.4,15,41056218,...,0,0,-0.066143,-0.003908,0.323604,0.019842,0.207774,0,Y,0.020833
7,ENSG00000104129,9.0,48,15,41060067,41099675,-1,ENSG00000104129.5,15,41060067,...,0,0,0.648261,0.707249,0.779957,0.110761,0.418014,0,Y,0.187500
8,ENSG00000177150,4.0,48,18,13663346,13726662,-1,ENSG00000177150.8,18,13663346,...,0,0,0.344554,0.375130,0.364536,-0.110495,0.069348,1,Y,0.083333
9,ENSG00000155313,1.0,48,21,17102344,17252377,1,ENSG00000155313.11,21,17102344,...,1,2,0.642495,0.144497,1.087454,0.740681,0.981162,0,Y,0.020833


In [68]:
genesWitheQTLTissueCountBonferroniAndCNVExACStatus['affectedByBroad'] = 'N'
genesWitheQTLTissueCountBonferroniAndCNVExACStatus.loc[genesWitheQTLTissueCountBonferroniAndCNVExACStatus['Ensembl Gene ID'].isin(broadBonferronieQTLAffectedGenes), 'affectedByBroad'] = 'Y'
genesWitheQTLTissueCountBonferroniAndCNVExACStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,gene,chr,start,...,dup.sing,del.sing.score,dup.sing.score,del.score,dup.score,cnv.score,flag,CNV,propOfExpressedAffectedByeQTL,affectedByBroad
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ENSG00000138593.4,15,49280673,...,0,1.006221,0.976367,1.092999,1.275552,1.523702,0,N,0.083333,N
1,ENSG00000168675,1.0,47,18,13217497,13652754,1,ENSG00000168675.14,18,13217497,...,0,0.620675,0.737930,0.723574,-0.029705,0.297903,0,Y,0.021277,N
2,ENSG00000185272,8.0,48,21,15588451,15600693,1,ENSG00000185272.9,21,15588451,...,0,0.187194,0.266138,0.728772,-1.183084,-0.702303,1,Y,0.166667,N
3,ENSG00000155304,1.0,48,21,15743436,15755805,-1,ENSG00000155304.4,21,15743436,...,0,0.416765,0.407245,0.962883,0.202447,0.548186,0,Y,0.020833,N
4,ENSG00000180530,2.0,48,21,16333556,16437321,-1,ENSG00000180530.5,21,16333556,...,0,0.093782,0.097961,0.577557,0.553032,0.719674,0,N,0.041667,N
5,ENSG00000137824,9.0,48,15,41028082,41048049,-1,ENSG00000137824.11,15,41028082,...,0,-1.590585,0.776270,-0.421097,0.534328,0.192307,0,Y,0.187500,N
6,ENSG00000137880,1.0,48,15,41056218,41059906,1,ENSG00000137880.4,15,41056218,...,0,-0.066143,-0.003908,0.323604,0.019842,0.207774,0,Y,0.020833,N
7,ENSG00000104129,9.0,48,15,41060067,41099675,-1,ENSG00000104129.5,15,41060067,...,0,0.648261,0.707249,0.779957,0.110761,0.418014,0,Y,0.187500,N
8,ENSG00000177150,4.0,48,18,13663346,13726662,-1,ENSG00000177150.8,18,13663346,...,0,0.344554,0.375130,0.364536,-0.110495,0.069348,1,Y,0.083333,N
9,ENSG00000155313,1.0,48,21,17102344,17252377,1,ENSG00000155313.11,21,17102344,...,2,0.642495,0.144497,1.087454,0.740681,0.981162,0,Y,0.020833,N


In [69]:
genesWitheQTLTissueCountBonferroniAndCNVExACStatus.groupby('affectedByBroad')['affectedTissues'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
affectedByBroad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,13120.0,4.570503,4.650879,0.0,1.0,3.0,7.0,28.0
Y,2030.0,27.96601,8.670937,14.0,21.0,26.0,33.0,48.0


In [70]:
genesWitheQTLTissueCountBonferroniAndCNVExACStatus[genesWitheQTLTissueCountBonferroniAndCNVExACStatus['affectedTissues'] >= 14].groupby(['CNV','affectedByBroad'])['affectedTissues'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
CNV,affectedByBroad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
N,N,67.0,15.701493,2.09628,14.0,14.0,15.0,16.0,23.0
N,Y,165.0,26.806061,8.580961,14.0,20.0,25.0,32.0,48.0
Y,N,786.0,16.192112,2.309215,14.0,14.0,16.0,17.0,28.0
Y,Y,1865.0,28.068633,8.673662,14.0,21.0,26.0,34.0,48.0


In [71]:
genesWitheQTLTissueCountBonferroniAndCNVExACStatus[
    genesWitheQTLTissueCountBonferroniAndCNVExACStatus['affectedTissues'] >= 14
].to_csv('../../outputFiles/eQTLsBroadBreadth/affectedByBroadBonferronieQTLsCNVExAC.csv', index=False)

##### Metasoft eQTLs

In [72]:
genesWitheQTLTissueCountMetasoftAndCNVExACStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountMetasoftAndCNVExACStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountMetasoftAndCNVExACStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,gene,chr,start,...,del.sing,dup.sing,del.sing.score,dup.sing.score,del.score,dup.score,cnv.score,flag,CNV,propOfExpressedAffectedByeQTL
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,ENSG00000138593.4,15,49280673,...,0,0,1.006221,0.976367,1.092999,1.275552,1.523702,0,N,0.500000
1,ENSG00000168675,10.0,47,18,13217497,13652754,1,ENSG00000168675.14,18,13217497,...,0,0,0.620675,0.737930,0.723574,-0.029705,0.297903,0,Y,0.212766
2,ENSG00000185272,48.0,48,21,15588451,15600693,1,ENSG00000185272.9,21,15588451,...,0,0,0.187194,0.266138,0.728772,-1.183084,-0.702303,1,Y,1.000000
3,ENSG00000155304,23.0,48,21,15743436,15755805,-1,ENSG00000155304.4,21,15743436,...,0,0,0.416765,0.407245,0.962883,0.202447,0.548186,0,Y,0.479167
4,ENSG00000166200,44.0,48,15,49398268,49447858,-1,ENSG00000166200.10,15,49398268,...,0,0,0.888225,0.898726,0.989254,1.167378,1.407464,0,N,0.916667
5,ENSG00000155307,4.0,48,21,15857549,15955723,-1,ENSG00000155307.13,21,15857549,...,1,1,-0.156759,0.002268,0.617652,-0.134204,0.146159,1,Y,0.083333
6,ENSG00000180530,12.0,48,21,16333556,16437321,-1,ENSG00000180530.5,21,16333556,...,0,0,0.093782,0.097961,0.577557,0.553032,0.719674,0,N,0.250000
7,ENSG00000137824,47.0,48,15,41028082,41048049,-1,ENSG00000137824.11,15,41028082,...,3,0,-1.590585,0.776270,-0.421097,0.534328,0.192307,0,Y,0.979167
8,ENSG00000137880,3.0,48,15,41056218,41059906,1,ENSG00000137880.4,15,41056218,...,0,0,-0.066143,-0.003908,0.323604,0.019842,0.207774,0,Y,0.062500
9,ENSG00000104129,48.0,48,15,41060067,41099675,-1,ENSG00000104129.5,15,41060067,...,0,0,0.648261,0.707249,0.779957,0.110761,0.418014,0,Y,1.000000


In [73]:
genesWitheQTLTissueCountMetasoftAndCNVExACStatus['affectedByBroad'] = 'N'
genesWitheQTLTissueCountMetasoftAndCNVExACStatus.loc[genesWitheQTLTissueCountMetasoftAndCNVExACStatus['Ensembl Gene ID'].isin(broadMetasofteQTLAffectedGenes), 'affectedByBroad'] = 'Y'
genesWitheQTLTissueCountMetasoftAndCNVExACStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,gene,chr,start,...,dup.sing,del.sing.score,dup.sing.score,del.score,dup.score,cnv.score,flag,CNV,propOfExpressedAffectedByeQTL,affectedByBroad
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,ENSG00000138593.4,15,49280673,...,0,1.006221,0.976367,1.092999,1.275552,1.523702,0,N,0.500000,N
1,ENSG00000168675,10.0,47,18,13217497,13652754,1,ENSG00000168675.14,18,13217497,...,0,0.620675,0.737930,0.723574,-0.029705,0.297903,0,Y,0.212766,N
2,ENSG00000185272,48.0,48,21,15588451,15600693,1,ENSG00000185272.9,21,15588451,...,0,0.187194,0.266138,0.728772,-1.183084,-0.702303,1,Y,1.000000,N
3,ENSG00000155304,23.0,48,21,15743436,15755805,-1,ENSG00000155304.4,21,15743436,...,0,0.416765,0.407245,0.962883,0.202447,0.548186,0,Y,0.479167,N
4,ENSG00000166200,44.0,48,15,49398268,49447858,-1,ENSG00000166200.10,15,49398268,...,0,0.888225,0.898726,0.989254,1.167378,1.407464,0,N,0.916667,N
5,ENSG00000155307,4.0,48,21,15857549,15955723,-1,ENSG00000155307.13,21,15857549,...,1,-0.156759,0.002268,0.617652,-0.134204,0.146159,1,Y,0.083333,N
6,ENSG00000180530,12.0,48,21,16333556,16437321,-1,ENSG00000180530.5,21,16333556,...,0,0.093782,0.097961,0.577557,0.553032,0.719674,0,N,0.250000,N
7,ENSG00000137824,47.0,48,15,41028082,41048049,-1,ENSG00000137824.11,15,41028082,...,0,-1.590585,0.776270,-0.421097,0.534328,0.192307,0,Y,0.979167,N
8,ENSG00000137880,3.0,48,15,41056218,41059906,1,ENSG00000137880.4,15,41056218,...,0,-0.066143,-0.003908,0.323604,0.019842,0.207774,0,Y,0.062500,N
9,ENSG00000104129,48.0,48,15,41060067,41099675,-1,ENSG00000104129.5,15,41060067,...,0,0.648261,0.707249,0.779957,0.110761,0.418014,0,Y,1.000000,Y


In [74]:
genesWitheQTLTissueCountMetasoftAndCNVExACStatus.groupby('affectedByBroad')['affectedTissues'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
affectedByBroad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,11843.0,25.041459,16.859926,0.0,8.0,26.0,42.0,48.0
Y,3307.0,47.784397,0.620109,43.0,48.0,48.0,48.0,48.0


In [75]:
genesWitheQTLTissueCountMetasoftAndCNVExACStatus[genesWitheQTLTissueCountMetasoftAndCNVExACStatus['affectedTissues'] >= 43].groupby(['CNV','affectedByBroad'])['affectedTissues'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
CNV,affectedByBroad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
N,N,284.0,45.93662,1.703104,43.0,44.0,46.0,47.0,48.0
N,Y,306.0,47.728758,0.711698,44.0,48.0,48.0,48.0,48.0
Y,N,2515.0,46.093837,1.659554,43.0,45.0,46.0,48.0,48.0
Y,Y,3001.0,47.79007,0.609848,43.0,48.0,48.0,48.0,48.0


In [76]:
genesWitheQTLTissueCountMetasoftAndCNVExACStatus[
    genesWitheQTLTissueCountMetasoftAndCNVExACStatus['affectedTissues'] >= 43
].to_csv('../../outputFiles/eQTLsBroadBreadth/affectedByBroadMetasofteQTLsCNVExAC.csv', index=False)

#### Haploinsufficient genes

##### Bonferroni-corrected eQTLs

In [77]:
genesWitheQTLTissueCountBonferroniAndHaploStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountBonferroniAndHaploStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountBonferroniAndHaploStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,pLI,haplo,propOfExpressedAffectedByeQTL
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,1.017131e-01,N,0.083333
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,7.366598e-01,N,1.000000
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,3.773064e-01,N,0.021277
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,8.825424e-09,N,0.433333
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,2.910155e-05,N,0.166667
5,ENSG00000155304,1.0,48,21,15743436,15755805,-1,1.886370e-03,N,0.020833
6,ENSG00000153575,8.0,48,15,22833395,22873892,1,1.261972e-04,N,0.166667
7,ENSG00000180530,2.0,48,21,16333556,16437321,-1,9.911722e-01,Y,0.041667
8,ENSG00000137824,9.0,48,15,41028082,41048049,-1,2.377960e-02,N,0.187500
9,ENSG00000137880,1.0,48,15,41056218,41059906,1,4.189688e-02,N,0.020833


In [79]:
genesWitheQTLTissueCountBonferroniAndHaploStatus['affectedByBroad'] = 'N'
genesWitheQTLTissueCountBonferroniAndHaploStatus.loc[genesWitheQTLTissueCountBonferroniAndHaploStatus['Ensembl Gene ID'].isin(broadBonferronieQTLAffectedGenes), 'affectedByBroad'] = 'Y'
genesWitheQTLTissueCountBonferroniAndHaploStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,pLI,haplo,propOfExpressedAffectedByeQTL,affectedByBroad
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,1.017131e-01,N,0.083333,N
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,7.366598e-01,N,1.000000,N
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,3.773064e-01,N,0.021277,N
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,8.825424e-09,N,0.433333,N
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,2.910155e-05,N,0.166667,N
5,ENSG00000155304,1.0,48,21,15743436,15755805,-1,1.886370e-03,N,0.020833,N
6,ENSG00000153575,8.0,48,15,22833395,22873892,1,1.261972e-04,N,0.166667,N
7,ENSG00000180530,2.0,48,21,16333556,16437321,-1,9.911722e-01,Y,0.041667,N
8,ENSG00000137824,9.0,48,15,41028082,41048049,-1,2.377960e-02,N,0.187500,N
9,ENSG00000137880,1.0,48,15,41056218,41059906,1,4.189688e-02,N,0.020833,N


In [80]:
genesWitheQTLTissueCountBonferroniAndHaploStatus.groupby('affectedByBroad')['affectedTissues'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
affectedByBroad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,14675.0,4.573492,4.681046,0.0,1.0,3.0,7.0,28.0
Y,2370.0,28.361603,8.913948,14.0,21.0,27.0,34.0,48.0


In [81]:
genesWitheQTLTissueCountBonferroniAndHaploStatus[genesWitheQTLTissueCountBonferroniAndHaploStatus['affectedTissues'] >= 14].groupby(['haplo','affectedByBroad'])['affectedTissues'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
haplo,affectedByBroad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
N,N,873.0,16.290951,2.378733,14.0,14.0,16.0,17.0,28.0
N,Y,2155.0,28.600464,8.934794,14.0,21.0,27.0,34.0,48.0
Y,N,95.0,15.2,1.554677,14.0,14.0,15.0,16.0,22.0
Y,Y,215.0,25.967442,8.351723,14.0,19.0,24.0,31.0,48.0


In [82]:
genesWitheQTLTissueCountBonferroniAndHaploStatus[
    genesWitheQTLTissueCountBonferroniAndHaploStatus['affectedTissues'] >= 14
].to_csv('../../outputFiles/eQTLsBroadBreadth/affectedByBroadBonferronieQTLsHaplo.csv', index=False)

##### Metasoft eQTLs

In [83]:
genesWitheQTLTissueCountMetasoftAndHaploStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountMetasoftAndHaploStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountMetasoftAndHaploStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,pLI,haplo,propOfExpressedAffectedByeQTL
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,1.017131e-01,N,0.500000
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,5.114037e-31,N,0.333333
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,3.773064e-01,N,0.212766
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,8.825424e-09,N,1.000000
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,2.910155e-05,N,1.000000
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,1.886370e-03,N,0.479167
6,ENSG00000166200,44.0,48,15,49398268,49447858,-1,9.995728e-01,Y,0.916667
7,ENSG00000155307,4.0,48,21,15857549,15955723,-1,7.560236e-02,N,0.083333
8,ENSG00000153575,48.0,48,15,22833395,22873892,1,1.261972e-04,N,1.000000
9,ENSG00000180530,12.0,48,21,16333556,16437321,-1,9.911722e-01,Y,0.250000


In [84]:
genesWitheQTLTissueCountMetasoftAndHaploStatus['affectedByBroad'] = 'N'
genesWitheQTLTissueCountMetasoftAndHaploStatus.loc[genesWitheQTLTissueCountMetasoftAndHaploStatus['Ensembl Gene ID'].isin(broadMetasofteQTLAffectedGenes), 'affectedByBroad'] = 'Y'
genesWitheQTLTissueCountMetasoftAndHaploStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,pLI,haplo,propOfExpressedAffectedByeQTL,affectedByBroad
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,1.017131e-01,N,0.500000,N
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,5.114037e-31,N,0.333333,N
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,3.773064e-01,N,0.212766,N
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,8.825424e-09,N,1.000000,N
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,2.910155e-05,N,1.000000,N
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,1.886370e-03,N,0.479167,N
6,ENSG00000166200,44.0,48,15,49398268,49447858,-1,9.995728e-01,Y,0.916667,N
7,ENSG00000155307,4.0,48,21,15857549,15955723,-1,7.560236e-02,N,0.083333,N
8,ENSG00000153575,48.0,48,15,22833395,22873892,1,1.261972e-04,N,1.000000,Y
9,ENSG00000180530,12.0,48,21,16333556,16437321,-1,9.911722e-01,Y,0.250000,N


In [85]:
genesWitheQTLTissueCountMetasoftAndHaploStatus.groupby('affectedByBroad')['affectedTissues'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
affectedByBroad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,13254.0,24.821639,16.924982,0.0,8.0,26.0,42.0,48.0
Y,3791.0,47.772092,0.64138,43.0,48.0,48.0,48.0,48.0


In [86]:
genesWitheQTLTissueCountMetasoftAndHaploStatus[genesWitheQTLTissueCountMetasoftAndHaploStatus['affectedTissues'] >= 43].groupby(['haplo','affectedByBroad'])['affectedTissues'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
haplo,affectedByBroad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
N,N,2566.0,46.106781,1.666636,43.0,45.0,46.0,48.0,48.0
N,Y,3334.0,47.775645,0.644435,43.0,48.0,48.0,48.0,48.0
Y,N,517.0,45.943907,1.686341,43.0,45.0,46.0,47.0,48.0
Y,Y,457.0,47.746171,0.618707,44.0,48.0,48.0,48.0,48.0


In [87]:
genesWitheQTLTissueCountMetasoftAndHaploStatus[
    genesWitheQTLTissueCountMetasoftAndHaploStatus['affectedTissues'] >= 43
].to_csv('../../outputFiles/eQTLsBroadBreadth/affectedByBroadMetasofteQTLsHaplo.csv', index=False)

#### Conserved copy number genes

##### Bonferroni-corrected eQTLs

In [88]:
genesWitheQTLTissueCountBonferroniAndCCNStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountBonferroniAndCCNStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountBonferroniAndCCNStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,CCN,propOfExpressedAffectedByeQTL
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,Y,0.083333
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,N,1.000000
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,Y,0.021277
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,N,0.433333
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,N,0.166667
5,ENSG00000182974,1.0,1,15,22368478,22369561,1,N,1.000000
6,ENSG00000155304,1.0,48,21,15743436,15755805,-1,Y,0.020833
7,ENSG00000153575,8.0,48,15,22833395,22873892,1,Y,0.166667
8,ENSG00000180530,2.0,48,21,16333556,16437321,-1,Y,0.041667
9,ENSG00000137824,9.0,48,15,41028082,41048049,-1,N,0.187500


In [90]:
genesWitheQTLTissueCountBonferroniAndCCNStatus['affectedByBroad'] = 'N'
genesWitheQTLTissueCountBonferroniAndCCNStatus.loc[genesWitheQTLTissueCountBonferroniAndCCNStatus['Ensembl Gene ID'].isin(broadBonferronieQTLAffectedGenes), 'affectedByBroad'] = 'Y'
genesWitheQTLTissueCountBonferroniAndCCNStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,CCN,propOfExpressedAffectedByeQTL,affectedByBroad
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,Y,0.083333,N
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,N,1.000000,N
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,Y,0.021277,N
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,N,0.433333,N
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,N,0.166667,N
5,ENSG00000182974,1.0,1,15,22368478,22369561,1,N,1.000000,N
6,ENSG00000155304,1.0,48,21,15743436,15755805,-1,Y,0.020833,N
7,ENSG00000153575,8.0,48,15,22833395,22873892,1,Y,0.166667,N
8,ENSG00000180530,2.0,48,21,16333556,16437321,-1,Y,0.041667,N
9,ENSG00000137824,9.0,48,15,41028082,41048049,-1,N,0.187500,N


In [91]:
genesWitheQTLTissueCountBonferroniAndCCNStatus.groupby('affectedByBroad')['affectedTissues'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
affectedByBroad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,15890.0,4.484456,4.658781,0.0,1.0,3.0,7.0,28.0
Y,2512.0,28.428742,8.977898,14.0,21.0,27.0,34.0,48.0


In [92]:
genesWitheQTLTissueCountBonferroniAndCCNStatus[genesWitheQTLTissueCountBonferroniAndCCNStatus['affectedTissues'] >= 14].groupby(['CCN','affectedByBroad'])['affectedTissues'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
CCN,affectedByBroad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
N,N,615.0,16.19187,2.346721,14.0,14.0,16.0,17.0,27.0
N,Y,1666.0,28.848139,9.167136,14.0,21.0,27.0,35.0,48.0
Y,N,400.0,16.18,2.349767,14.0,14.0,16.0,17.0,28.0
Y,Y,846.0,27.602837,8.538292,14.0,21.0,26.0,33.0,48.0


In [93]:
genesWitheQTLTissueCountBonferroniAndCCNStatus[
    genesWitheQTLTissueCountBonferroniAndCCNStatus['affectedTissues'] >= 14
].to_csv('../../outputFiles/eQTLsBroadBreadth/affectedByBroadBonferronieQTLsCCN.csv', index=False)

##### Metasoft eQTLs

In [94]:
genesWitheQTLTissueCountMetasoftAndCCNStatus = pd.read_csv('../../outputFiles/genesWitheQTLTissueCountMetasoftAndCCNStatus.csv', sep="\t", index_col="Unnamed: 0")
genesWitheQTLTissueCountMetasoftAndCCNStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,CCN,propOfExpressedAffectedByeQTL
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,Y,0.500000
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,N,0.333333
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,Y,0.212766
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,N,1.000000
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,N,1.000000
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,Y,0.479167
6,ENSG00000197414,1.0,4,15,22736246,22746002,1,N,0.250000
7,ENSG00000166200,44.0,48,15,49398268,49447858,-1,N,0.916667
8,ENSG00000155307,4.0,48,21,15857549,15955723,-1,Y,0.083333
9,ENSG00000243440,2.0,7,21,15964251,16031142,-1,N,0.285714


In [95]:
genesWitheQTLTissueCountMetasoftAndCCNStatus['affectedByBroad'] = 'N'
genesWitheQTLTissueCountMetasoftAndCCNStatus.loc[genesWitheQTLTissueCountMetasoftAndCCNStatus['Ensembl Gene ID'].isin(broadMetasofteQTLAffectedGenes), 'affectedByBroad'] = 'Y'
genesWitheQTLTissueCountMetasoftAndCCNStatus

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,CCN,propOfExpressedAffectedByeQTL,affectedByBroad
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,Y,0.500000,N
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,N,0.333333,N
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,Y,0.212766,N
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,N,1.000000,N
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,N,1.000000,N
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,Y,0.479167,N
6,ENSG00000197414,1.0,4,15,22736246,22746002,1,N,0.250000,N
7,ENSG00000166200,44.0,48,15,49398268,49447858,-1,N,0.916667,N
8,ENSG00000155307,4.0,48,21,15857549,15955723,-1,Y,0.083333,N
9,ENSG00000243440,2.0,7,21,15964251,16031142,-1,N,0.285714,N


In [96]:
genesWitheQTLTissueCountMetasoftAndCCNStatus.groupby('affectedByBroad')['affectedTissues'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
affectedByBroad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N,14397.0,24.30555,17.010222,0.0,7.0,25.0,41.0,48.0
Y,4005.0,47.760799,0.670643,43.0,48.0,48.0,48.0,48.0


In [97]:
genesWitheQTLTissueCountMetasoftAndCCNStatus[genesWitheQTLTissueCountMetasoftAndCCNStatus['affectedTissues'] >= 43].groupby(['CCN','affectedByBroad'])['affectedTissues'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
CCN,affectedByBroad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
N,N,1938.0,46.058308,1.674679,43.0,45.0,46.0,48.0,48.0
N,Y,2613.0,47.741294,0.714409,43.0,48.0,48.0,48.0,48.0
Y,N,1298.0,46.09245,1.657884,43.0,45.0,46.0,48.0,48.0
Y,Y,1392.0,47.797414,0.578099,43.0,48.0,48.0,48.0,48.0


In [98]:
genesWitheQTLTissueCountMetasoftAndCCNStatus[
    genesWitheQTLTissueCountMetasoftAndCCNStatus['affectedTissues'] >= 43
].to_csv('../../outputFiles/eQTLsBroadBreadth/affectedByBroadMetasofteQTLsCCN.csv', index=False)

### Enrichment of gene groups for broad tissue breadth eQTLs

#### Ohnologs

##### Bonferroni-corrected eQTLs

In [100]:
eQTLsAndNumTissuesAffected.head()

Unnamed: 0,variant_id,gene_id,countUncorrected,countBonferroniCorrected,countMetasoft
0,10_100000625_A_G_b37,ENSG00000138131,5,4.0,5
1,10_100000625_A_G_b37,ENSG00000166024,4,3.0,9
2,10_100000625_A_G_b37,ENSG00000230928,4,3.0,9
3,10_100000645_A_C_b37,ENSG00000138131,1,1.0,2
4,10_100000645_A_C_b37,ENSG00000230928,11,10.0,29


In [104]:
eQTLsAndNumTissuesAffectedOhnolog = pd.merge(genesWitheQTLTissueCountBonferroniAndOhnologStatus, eQTLsAndNumTissuesAffected, left_on="Ensembl Gene ID", right_on="gene_id")
eQTLsAndNumTissuesAffectedOhnolog.head()

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,type,propOfExpressedAffectedByeQTL,affectedByBroad,variant_id,gene_id,countUncorrected,countBonferroniCorrected,countMetasoft
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno,0.083333,N,15_48536530_A_G_b37,ENSG00000138593,1,0.0,1
1,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno,0.083333,N,15_48595192_G_T_b37,ENSG00000138593,1,0.0,1
2,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno,0.083333,N,15_48595366_A_G_b37,ENSG00000138593,1,0.0,1
3,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno,0.083333,N,15_48597302_A_G_b37,ENSG00000138593,1,0.0,1
4,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno,0.083333,N,15_48682326_G_C_b37,ENSG00000138593,1,0.0,0


In [105]:
eQTLsAndNumTissuesAffectedOhnolog['propBonferroniCorrected'] = eQTLsAndNumTissuesAffectedOhnolog['countBonferroniCorrected'] / eQTLsAndNumTissuesAffectedOhnolog['expressedTissues']
eQTLsAndNumTissuesAffectedOhnolog['propMetasoft'] = eQTLsAndNumTissuesAffectedOhnolog['countMetasoft'] / eQTLsAndNumTissuesAffectedOhnolog['expressedTissues']

In [108]:
eQTLsAndNumTissuesAffectedOhnolog.groupby('type')[['propBonferroniCorrected']].describe()

Unnamed: 0_level_0,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
SSD,1894995.0,0.075062,0.159337,0.0,0.0,0.020833,0.0625,1.0
ohno,1701478.0,0.044665,0.096153,0.0,0.0,0.020833,0.041667,1.0
singleton,1560098.0,0.072267,0.146831,0.0,0.0,0.020833,0.0625,1.0


In [107]:
eQTLsAndNumTissuesAffectedOhnolog[eQTLsAndNumTissuesAffectedOhnolog['propBonferroniCorrected'] > 0].groupby('type')[['propBonferroniCorrected']].describe()

Unnamed: 0_level_0,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected,propBonferroniCorrected
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
SSD,1127919.0,0.12611,0.190306,0.020833,0.020833,0.041667,0.125,1.0
ohno,969051.0,0.078424,0.116558,0.020833,0.020833,0.041667,0.083333,1.0
singleton,925297.0,0.121846,0.174095,0.020833,0.020833,0.041667,0.136364,1.0


In [109]:
eQTLsAndNumTissuesAffectedOhnolog.groupby('type')[['propMetasoft']].describe()

Unnamed: 0_level_0,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
SSD,1894995.0,0.375559,0.326842,0.0,0.066667,0.276596,0.666667,1.0
ohno,1701478.0,0.258646,0.271987,0.0,0.041667,0.145833,0.416667,1.0
singleton,1560098.0,0.400898,0.322238,0.0,0.083333,0.333333,0.6875,1.0


In [110]:
eQTLsAndNumTissuesAffectedOhnolog[eQTLsAndNumTissuesAffectedOhnolog['propMetasoft'] > 0].groupby('type')[['propMetasoft']].describe()

Unnamed: 0_level_0,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft,propMetasoft
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
SSD,1812158.0,0.392727,0.323986,0.020833,0.083333,0.3125,0.680851,1.0
ohno,1611438.0,0.273098,0.27233,0.020833,0.047619,0.166667,0.4375,1.0
singleton,1502376.0,0.416301,0.318457,0.020833,0.104167,0.358974,0.708333,1.0
