# Ohnologs & eQTLs

### Background

Enrichment for eQTLs of ohnologs and nonohnologs.

### Imports

In [1]:
import pandas as pd
import re
import os.path

### Functions

In [2]:
def removeGeneIDVersions(text):
    return re.findall('(ENSG\d+)', text)[0]

### Input/Output files

---

### eQTL affected tissues per gene
[Back to top](#Outline)

In [3]:
genesTestedForeQTLsWithNumberOfTissues = pd.read_csv('../outputFiles/GTExV7/genesTestedWithNumberOfTissues.csv')
genesTestedForeQTLsWithNumberOfTissues.head()

Unnamed: 0,expressedTissues,Ensembl Gene ID
0,48,ENSG00000272186
1,48,ENSG00000117748
2,48,ENSG00000130856
3,48,ENSG00000169446
4,48,ENSG00000013573


In [4]:
len(genesTestedForeQTLsWithNumberOfTissues)

38187

In [5]:
genesTestedForeQTLsWithNumberOfMergedTissues = pd.read_csv('../outputFiles/GTExV7/genesTestedWithNumberOfMergedTissues.csv')
genesTestedForeQTLsWithNumberOfMergedTissues.head()

Unnamed: 0,expressedTissues,Ensembl Gene ID
0,26,ENSG00000170604
1,26,ENSG00000178458
2,26,ENSG00000141425
3,26,ENSG00000117115
4,26,ENSG00000105204


In [6]:
PCGenes = pd.read_csv('../datasets/geneLists/Ensembl/EnsV75ProteinCodingGenes1-Y.txt', sep='\t')
PCGenes.head()

Unnamed: 0,Ensembl Gene ID,Chromosome Name,Gene Start (bp),Gene End (bp),Strand
0,ENSG00000215405,15,20737094,20747114,-1
1,ENSG00000268343,15,21004687,21005367,1
2,ENSG00000230031,15,21040701,21071643,-1
3,ENSG00000138593,15,49280673,49338760,-1
4,ENSG00000268531,15,22011370,22012050,1


In [9]:
len(PCGenes)

20314

In [7]:
genesTestedForeQTLsWithNumberOfTissues = pd.merge(genesTestedForeQTLsWithNumberOfTissues, PCGenes, on="Ensembl Gene ID")
genesTestedForeQTLsWithNumberOfMergedTissues = pd.merge(genesTestedForeQTLsWithNumberOfMergedTissues, PCGenes, on="Ensembl Gene ID")

In [8]:
len(genesTestedForeQTLsWithNumberOfTissues)

19067

##### Bonferroni-corrected

In [16]:
genesWitheQTLTissueCountBonferroni = pd.read_csv('../outputFiles/GTExV7/genesWithNumberOfBonferroniAffectedTissues.csv', index_col = 0)
genesWitheQTLTissueCountBonferroni.head()

Unnamed: 0,Ensembl Gene ID,affectedTissues
0,ENSG00000138593,4
1,ENSG00000166351,1
2,ENSG00000168675,1
3,ENSG00000188992,13
4,ENSG00000185272,8


In [17]:
genesWitheQTLTissueCountBonferroni = pd.merge(genesWitheQTLTissueCountBonferroni, genesTestedForeQTLsWithNumberOfTissues, on='Ensembl Gene ID', how='right')
genesWitheQTLTissueCountBonferroni

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1
1,ENSG00000166351,1.0,1,21,14982498,15013906,1
2,ENSG00000168675,1.0,47,18,13217497,13652754,1
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1
4,ENSG00000185272,8.0,48,21,15588451,15600693,1
5,ENSG00000182974,1.0,1,15,22368478,22369561,1
6,ENSG00000155304,1.0,48,21,15743436,15755805,-1
7,ENSG00000153575,8.0,48,15,22833395,22873892,1
8,ENSG00000180530,2.0,48,21,16333556,16437321,-1
9,ENSG00000137824,9.0,48,15,41028082,41048049,-1


In [18]:
genesWitheQTLTissueCountBonferroni['affectedTissues'].fillna(0, inplace=True)

##### Ohnologs

In [20]:
ohnologsRelaxed = pd.read_csv('../datasets/geneLists/Singh and Isambert/hsapiens.Pairs.Relaxed.2R.Ens75.1-Y.txt')
ohnologsRelaxed.head()

Unnamed: 0,Ensembl Gene ID
0,ENSG00000164236
1,ENSG00000147465
2,ENSG00000095464
3,ENSG00000078804
4,ENSG00000155744


In [21]:
SSDsRelaxedOhnos = pd.read_csv('../datasets/geneLists/SSDsRelaxedOhnos.txt')
SSDsRelaxedOhnos.head()

Unnamed: 0,Ensembl Gene ID
0,ENSG00000215405
1,ENSG00000268343
2,ENSG00000230031
3,ENSG00000268531
4,ENSG00000233917


In [22]:
singletons = pd.read_csv('../datasets/geneLists/singletons.txt')
singletons.head()

Unnamed: 0,Ensembl Gene ID
0,ENSG00000269182
1,ENSG00000155304
2,ENSG00000243440
3,ENSG00000180530
4,ENSG00000259458


In [23]:
genesWitheQTLTissueCountBonferroni.loc[:, 'type'] = ''
genesWitheQTLTissueCountBonferroni.loc[genesWitheQTLTissueCountBonferroni['Ensembl Gene ID'].isin(ohnologsRelaxed['Ensembl Gene ID']) , 'type'] = 'ohno'
genesWitheQTLTissueCountBonferroni.loc[genesWitheQTLTissueCountBonferroni['Ensembl Gene ID'].isin(SSDsRelaxedOhnos['Ensembl Gene ID']) , 'type'] = 'SSD'
genesWitheQTLTissueCountBonferroni.loc[genesWitheQTLTissueCountBonferroni['Ensembl Gene ID'].isin(singletons['Ensembl Gene ID']) , 'type'] = 'singleton'
genesWitheQTLTissueCountBonferroni.groupby('type').describe()

Unnamed: 0_level_0,Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene Start (bp),Gene Start (bp),...,affectedTissues,affectedTissues,expressedTissues,expressedTissues,expressedTissues,expressedTissues,expressedTissues,expressedTissues,expressedTissues,expressedTissues
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
SSD,6777.0,73618750.0,56271810.0,70008.0,30894236.0,56667898.0,109584800.0,249214145.0,6777.0,73564900.0,...,11.0,48.0,6777.0,39.105947,15.847822,1.0,40.0,48.0,48.0,48.0
ohno,6550.0,75975530.0,55839250.0,31427.0,32878425.75,61947334.0,113484100.0,246670614.0,6550.0,75874940.0,...,9.0,48.0,6550.0,43.52229,10.59156,1.0,46.0,48.0,48.0,48.0
singleton,5740.0,73859080.0,55558430.0,69452.0,31880189.75,58760361.0,110432800.0,249143716.0,5740.0,73811760.0,...,12.0,48.0,5740.0,41.820383,13.72123,1.0,47.0,48.0,48.0,48.0


In [25]:
genesWitheQTLTissueCountBonferroni.head()

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,type
0,ENSG00000138593,4.0,48,15,49280673,49338760,-1,ohno
1,ENSG00000166351,1.0,1,21,14982498,15013906,1,SSD
2,ENSG00000168675,1.0,47,18,13217497,13652754,1,SSD
3,ENSG00000188992,13.0,30,21,15481134,15583166,-1,SSD
4,ENSG00000185272,8.0,48,21,15588451,15600693,1,ohno


In [26]:
genesWitheQTLTissueCountBonferroni['propOfExpressedAffectedByeQTL'] = genesWitheQTLTissueCountBonferroni['affectedTissues'] / genesWitheQTLTissueCountBonferroni['expressedTissues']

In [28]:
genesWitheQTLTissueCountBonferroni.groupby('type').describe()

Unnamed: 0_level_0,Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene Start (bp),Gene Start (bp),...,expressedTissues,expressedTissues,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
SSD,6777.0,73618750.0,56271810.0,70008.0,30894236.0,56667898.0,109584800.0,249214145.0,6777.0,73564900.0,...,48.0,48.0,6777.0,0.204547,0.252103,0.0,0.020833,0.104167,0.291667,1.0
ohno,6550.0,75975530.0,55839250.0,31427.0,32878425.75,61947334.0,113484100.0,246670614.0,6550.0,75874940.0,...,48.0,48.0,6550.0,0.151683,0.177107,0.0,0.030303,0.086957,0.208333,1.0
singleton,5740.0,73859080.0,55558430.0,69452.0,31880189.75,58760361.0,110432800.0,249143716.0,5740.0,73811760.0,...,48.0,48.0,5740.0,0.200093,0.242668,0.0,0.020833,0.104167,0.291667,1.0


In [29]:
genesWitheQTLTissueCountBonferroni[genesWitheQTLTissueCountBonferroni['affectedTissues']>0].groupby('type').describe()

Unnamed: 0_level_0,Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene Start (bp),Gene Start (bp),...,expressedTissues,expressedTissues,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
SSD,5473.0,73912070.0,56294340.0,73545.0,31054296.0,57233335.0,109780800.0,249153343.0,5473.0,73853130.0,...,48.0,48.0,5473.0,0.253282,0.257596,0.020833,0.0625,0.145833,0.354167,1.0
ohno,5604.0,75732040.0,55837580.0,31427.0,32638860.25,61984211.5,113339100.0,246670614.0,5604.0,75624910.0,...,48.0,48.0,5604.0,0.177289,0.179228,0.020833,0.0625,0.12,0.229167,1.0
singleton,4661.0,73720660.0,55896310.0,69452.0,31499709.0,58554431.0,110421600.0,248903150.0,4661.0,73669240.0,...,48.0,48.0,4661.0,0.246414,0.247197,0.020833,0.0625,0.145833,0.354167,1.0


In [30]:
genesWitheQTLTissueCountBonferroni.to_csv("../outputFiles/genesWitheQTLTissueCountBonferroniAndOhnologStatus.csv", sep='\t')

##### Metasoft

In [33]:
genesWitheQTLTissueCountMetasoft = pd.read_csv('../outputFiles/GTExV7/genesWithNumberOfMetasoftAffectedTissues.csv', index_col = 0)
genesWitheQTLTissueCountMetasoft = pd.merge(genesWitheQTLTissueCountMetasoft, genesTestedForeQTLsWithNumberOfTissues, on='Ensembl Gene ID', how='right')
genesWitheQTLTissueCountMetasoft['affectedTissues'].fillna(0, inplace=True)
genesWitheQTLTissueCountMetasoft['propOfExpressedAffectedByeQTL'] = genesWitheQTLTissueCountMetasoft['affectedTissues'] / genesWitheQTLTissueCountMetasoft['expressedTissues']
genesWitheQTLTissueCountMetasoft.loc[:, 'type'] = ''
genesWitheQTLTissueCountMetasoft.loc[genesWitheQTLTissueCountMetasoft['Ensembl Gene ID'].isin(ohnologsRelaxed['Ensembl Gene ID']) , 'type'] = 'ohno'
genesWitheQTLTissueCountMetasoft.loc[genesWitheQTLTissueCountMetasoft['Ensembl Gene ID'].isin(SSDsRelaxedOhnos['Ensembl Gene ID']) , 'type'] = 'SSD'
genesWitheQTLTissueCountMetasoft.loc[genesWitheQTLTissueCountMetasoft['Ensembl Gene ID'].isin(singletons['Ensembl Gene ID']) , 'type'] = 'singleton'
genesWitheQTLTissueCountMetasoft

Unnamed: 0,Ensembl Gene ID,affectedTissues,expressedTissues,Chromosome Name,Gene Start (bp),Gene End (bp),Strand,propOfExpressedAffectedByeQTL,type
0,ENSG00000138593,24.0,48,15,49280673,49338760,-1,0.500000,ohno
1,ENSG00000166157,1.0,3,21,10906201,11029719,-1,0.333333,SSD
2,ENSG00000168675,10.0,47,18,13217497,13652754,1,0.212766,SSD
3,ENSG00000188992,30.0,30,21,15481134,15583166,-1,1.000000,SSD
4,ENSG00000185272,48.0,48,21,15588451,15600693,1,1.000000,ohno
5,ENSG00000155304,23.0,48,21,15743436,15755805,-1,0.479167,singleton
6,ENSG00000197414,1.0,4,15,22736246,22746002,1,0.250000,SSD
7,ENSG00000166200,44.0,48,15,49398268,49447858,-1,0.916667,SSD
8,ENSG00000155307,4.0,48,21,15857549,15955723,-1,0.083333,SSD
9,ENSG00000243440,2.0,7,21,15964251,16031142,-1,0.285714,singleton


In [34]:
genesWitheQTLTissueCountMetasoft.groupby('type').describe()

Unnamed: 0_level_0,Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene Start (bp),Gene Start (bp),...,expressedTissues,expressedTissues,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
SSD,6777.0,73618750.0,56271810.0,70008.0,30894236.0,56667898.0,109584800.0,249214145.0,6777.0,73564900.0,...,48.0,48.0,6777.0,0.64626,0.373773,0.0,0.296296,0.8125,1.0,1.0
ohno,6550.0,75975530.0,55839250.0,31427.0,32878425.75,61947334.0,113484100.0,246670614.0,6550.0,75874940.0,...,48.0,48.0,6550.0,0.627873,0.342538,0.0,0.3125,0.708333,0.976744,1.0
singleton,5740.0,73859080.0,55558430.0,69452.0,31880189.75,58760361.0,110432800.0,249143716.0,5740.0,73811760.0,...,48.0,48.0,5740.0,0.683658,0.367859,0.0,0.354167,0.888889,1.0,1.0


In [35]:
genesWitheQTLTissueCountMetasoft[genesWitheQTLTissueCountMetasoft['affectedTissues']>0].groupby('type').describe()

Unnamed: 0_level_0,Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene End (bp),Gene Start (bp),Gene Start (bp),...,expressedTissues,expressedTissues,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL,propOfExpressedAffectedByeQTL
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
SSD,6161.0,73762250.0,55964150.0,73545.0,31236510.0,57233335.0,108960832.0,249214145.0,6161.0,73704430.0,...,48.0,48.0,6161.0,0.710876,0.328232,0.020833,0.4375,0.875,1.0,1.0
ohno,6351.0,75899640.0,55841680.0,31427.0,32809518.5,61966203.0,113297392.5,246670614.0,6351.0,75797090.0,...,48.0,48.0,6351.0,0.647547,0.32904,0.020833,0.354167,0.729167,0.978723,1.0
singleton,5355.0,73797000.0,55735170.0,69452.0,31744889.0,58523735.0,110232462.0,249143716.0,5355.0,73748050.0,...,48.0,48.0,5355.0,0.73281,0.330189,0.020833,0.5,0.916667,1.0,1.0


In [36]:
genesWitheQTLTissueCountMetasoft.to_csv("../outputFiles/genesWitheQTLTissueCountMetasoftAndOhnologStatus.csv", sep='\t')