# P A C K A G E : : : C O M P A R I S O N

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# importing bayexpress results, all (clean) replicates included
# differential gene expression analysis was done in do_DGE.ipynb

RALL_bayexpress = pd.read_csv('DGE_results/RALL_bayexpress.csv').iloc[:,1:].rename(columns={'genes': "locus_name"})

# we don't loose any genes here for nans
# RALL_bayexpress = RALL_bayexpress.dropna(subset=['BF_21', 'FC'])

# criteria 0 for significance: (BF_21 > 1) & (abs(FC) > 0)
RALL_bayexpress['FC0_bayexpress'] = (RALL_bayexpress.BF_21 > 1) & (abs(RALL_bayexpress.FC) > 0)

# criteria 1 for significance: (BF_21 > 1) & (abs(FC) > 1)
RALL_bayexpress['FC1_bayexpress'] = (RALL_bayexpress.BF_21 > 1) & (abs(RALL_bayexpress.FC) > 1)

# criteria 2 for significance: (BF_21 > 1) & (abs(FC) > 2)
RALL_bayexpress['FC2_bayexpress'] = (RALL_bayexpress.BF_21 > 1) & (abs(RALL_bayexpress.FC) > 2)

RALL_bayexpress

Unnamed: 0,locus_name,BF_21,FC,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress
0,15S_rRNA,-3.710645,0.274047,False,False,False
1,21S_rRNA,-1.302642,0.147177,False,False,False
2,HRA1,-5.206545,-0.564787,False,False,False
3,ICR1,76.746125,0.504048,True,False,False
4,LSR1,0.451012,0.120243,False,False,False
...,...,...,...,...,...,...
7121,tY(GUA)J2,-7.312143,0.861839,False,False,False
7122,tY(GUA)M1,-7.406574,0.022303,False,False,False
7123,tY(GUA)M2,-7.727890,0.183767,False,False,False
7124,tY(GUA)O,-7.622253,0.347266,False,False,False


In [3]:
# importing edgeR results, all (clean) replicates included
# differential gene expression analysis was done in do_DGE.ipynb

RALL_edgeR = pd.read_csv('DGE_results/RALL_edgeR.csv').rename(columns={'genes': "locus_name"})

# we don't loose any genes here for nans
# RALL_edgeR = RALL_edgeR.dropna(subset=['logFC', 'PValue'])


# criteria 0 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 0)
RALL_edgeR['FC0_edgeR'] = (RALL_edgeR.PValue < 0.05) & (abs(RALL_edgeR.logFC) > 0)

# criteria 1 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 1)
RALL_edgeR['FC1_edgeR'] = (RALL_edgeR.PValue < 0.05) & (abs(RALL_edgeR.logFC) > 1)

# criteria 2 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 2)
RALL_edgeR['FC2_edgeR'] = (RALL_edgeR.PValue < 0.05) & (abs(RALL_edgeR.logFC) > 2)


RALL_edgeR

Unnamed: 0,locus_name,logFC,logCPM,F,PValue,FDR,FC0_edgeR,FC1_edgeR,FC2_edgeR
0,YIL121W,-2.833997,6.681355,1.393437e+04,2.275960e-99,1.386970e-95,True,True,True
1,YDR033W,-3.765504,8.923851,1.149128e+04,1.110106e-95,3.382494e-92,True,True,True
2,YML123C,-4.669706,9.152594,1.051245e+04,5.593728e-94,1.136273e-90,True,True,True
3,YGR234W,-4.212037,8.462599,9.288989e+03,1.292927e-91,1.969774e-88,True,True,True
4,YOR290C,-7.557956,6.530807,9.462461e+03,1.696224e-91,2.067358e-88,True,True,True
...,...,...,...,...,...,...,...,...,...
6089,YBL052C,0.000363,5.364961,4.246443e-05,9.948153e-01,9.954687e-01,False,False,False
6090,YHR117W,0.000097,6.355627,2.441982e-05,9.960682e-01,9.965588e-01,False,False,False
6091,YMR123W,-0.000171,6.143127,2.150691e-05,9.963102e-01,9.966373e-01,False,False,False
6092,YGR136W,0.000152,6.616432,1.272198e-05,9.971621e-01,9.973258e-01,False,False,False


In [4]:
# importing DESeq2 results, all (clean) replicates included
# differential gene expression analysis was done in do_DGE.ipynb


RALL_DESeq2 = pd.read_csv('DGE_results/RALL_DESeq2.csv').rename(columns={'Unnamed: 0': "locus_name"})

# this is a bit crazy but we loose a lot of genes for nans here
RALL_DESeq2 = RALL_DESeq2.dropna(subset=['log2FoldChange', 'pvalue'])

# criteria 0 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 0)
RALL_DESeq2['FC0_DESeq2'] = (RALL_DESeq2.pvalue < 0.05) & (abs(RALL_DESeq2.log2FoldChange) > 0)

# criteria 1 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 1)
RALL_DESeq2['FC1_DESeq2'] = (RALL_DESeq2.pvalue < 0.05) & (abs(RALL_DESeq2.log2FoldChange) > 1)

# criteria 2 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 2)
RALL_DESeq2['FC2_DESeq2'] = (RALL_DESeq2.pvalue < 0.05) & (abs(RALL_DESeq2.log2FoldChange) > 2)

RALL_DESeq2

Unnamed: 0,locus_name,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2
0,15S_rRNA,16.994794,-0.136618,0.340703,-0.400988,6.884290e-01,7.313923e-01,False,False,False
1,21S_rRNA,102.643528,-0.243675,0.289561,-0.841533,4.000494e-01,4.548869e-01,False,False,False
2,HRA1,2.550649,-0.851126,0.230477,-3.692895,2.217160e-04,3.651076e-04,True,False,False
3,ICR1,142.961822,0.229845,0.034777,6.609078,3.867198e-11,9.556948e-11,True,False,False
4,LSR1,196.640749,-0.214104,0.162418,-1.318227,1.874278e-01,2.269904e-01,False,False,False
...,...,...,...,...,...,...,...,...,...,...
7120,tY(GUA)J1,0.140228,-0.042322,1.074228,-0.039397,9.685735e-01,9.738411e-01,False,False,False
7121,tY(GUA)J2,0.133175,-0.042322,1.000548,-0.042298,9.662608e-01,9.724756e-01,False,False,False
7122,tY(GUA)M1,0.385050,-0.273708,0.488968,-0.559768,5.756379e-01,6.256143e-01,False,False,False
7123,tY(GUA)M2,0.080417,-0.301508,1.426535,-0.211357,8.326090e-01,8.622913e-01,False,False,False


In [5]:
RALL = pd.DataFrame({'locus_name': RALL_bayexpress.locus_name})

# RALL = pd.concat([RALL, RALL_bayexpress.iloc[:,-2:], RALL_edgeR.iloc[:,-2:], RALL_DESeq2.iloc[:,-2:]], axis=1)

RALL = RALL.merge(RALL_bayexpress[['locus_name', 'FC0_bayexpress', 'FC1_bayexpress', 'FC2_bayexpress', 'FC', 'BF_21']], on='locus_name', how='left')

# display(RALL)

RALL = RALL.merge(RALL_edgeR[['locus_name', 'FC0_edgeR', 'FC1_edgeR', 'FC2_edgeR', 'logFC', 'PValue']], on='locus_name', how='left')

RALL = RALL.merge(RALL_DESeq2[['locus_name', 'FC0_DESeq2', 'FC1_DESeq2', 'FC2_DESeq2', 'log2FoldChange', 'pvalue']], on='locus_name', how='left')

# RALL.to_csv('results_RALL.csv')

RALL

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
0,15S_rRNA,False,False,False,0.274047,-3.710645,,,,,,False,False,False,-0.136618,6.884290e-01
1,21S_rRNA,False,False,False,0.147177,-1.302642,False,False,False,-0.187417,5.227208e-01,False,False,False,-0.243675,4.000494e-01
2,HRA1,False,False,False,-0.564787,-5.206545,,,,,,True,False,False,-0.851126,2.217160e-04
3,ICR1,True,False,False,0.504048,76.746125,True,False,False,0.238044,1.313037e-09,True,False,False,0.229845,3.867198e-11
4,LSR1,False,False,False,0.120243,0.451012,False,False,False,-0.191748,2.362411e-01,False,False,False,-0.214104,1.874278e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7121,tY(GUA)J2,False,False,False,0.861839,-7.312143,,,,,,False,False,False,-0.042322,9.662608e-01
7122,tY(GUA)M1,False,False,False,0.022303,-7.406574,,,,,,False,False,False,-0.273708,5.756379e-01
7123,tY(GUA)M2,False,False,False,0.183767,-7.727890,,,,,,False,False,False,-0.301508,8.326090e-01
7124,tY(GUA)O,False,False,False,0.347266,-7.622253,,,,,,False,False,False,-0.239053,8.377597e-01


In [6]:
# example genes in the paper

RALL.set_index('locus_name').loc[['YAL016C-B', 'YDL062W', 'RPR1', 'YAL063C-A', 'YGR161W-C', 'YAL061W', '15S_rRNA']]

Unnamed: 0_level_0,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
locus_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
YAL016C-B,True,True,False,1.06013,126.045252,True,False,False,0.782743,6.583448e-14,True,False,False,0.775214,1.2666950000000001e-18
YDL062W,False,False,False,-0.86686,-3.484584,,,,,,True,True,False,-1.142114,1.652766e-05
RPR1,True,False,False,-0.973507,102.505781,True,True,False,-1.265288,2.773107e-12,True,True,False,-1.301175,4.3190620000000004e-17
YAL063C-A,True,True,False,1.355379,17.5168,,,,,,True,True,False,1.09709,2.823846e-10
YGR161W-C,True,True,False,1.302575,2034.172394,True,True,False,1.011018,1.648491e-26,True,False,False,0.998423,1.6407359999999998e-50
YAL061W,True,True,False,-1.93853,9735.106916,True,True,True,-2.2243,4.376841e-51,True,True,True,-2.234932,1.9526600000000002e-241
15S_rRNA,False,False,False,0.274047,-3.710645,,,,,,False,False,False,-0.136618,0.688429


In [7]:
# Summary stats for the Venn diagramm 
# How many DEGs combined?
# DEG = 'statistically significant change' & | log2 fold change | > 0

print('% overlap all 3 of positive hits: ', 4251/6259)
print('% overlap all 3 of total number of genes: ', 4251/7126)

RALL.loc[(RALL.FC0_bayexpress == True) | (RALL.FC0_edgeR == True) | (RALL.FC0_DESeq2 == True)]

% overlap all 3 of positive hits:  0.6791819779517495
% overlap all 3 of total number of genes:  0.5965478529329217


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
2,HRA1,False,False,False,-0.564787,-5.206545,,,,,,True,False,False,-0.851126,2.217160e-04
3,ICR1,True,False,False,0.504048,76.746125,True,False,False,0.238044,1.313037e-09,True,False,False,0.229845,3.867198e-11
41,RDN5-1,True,False,False,0.316040,13.774601,False,False,False,-0.007909,9.742264e-01,False,False,False,-0.064869,7.857172e-01
46,RDN5-6,True,False,False,0.469612,15.230943,False,False,False,0.144454,6.470872e-01,False,False,False,0.039701,8.973065e-01
49,RNA170,True,False,False,0.785378,15.197425,True,False,False,0.508352,4.155190e-08,True,False,False,0.504294,3.629513e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7101,tV(AAC)M2,False,False,False,-0.578734,-5.971615,,,,,,True,False,False,-0.866313,2.086246e-03
7103,tV(AAC)O,False,False,False,-0.581105,-5.767066,,,,,,True,False,False,-0.854055,1.762979e-03
7109,tW(CCA)G1,False,False,False,-1.036282,-5.570935,,,,,,True,True,False,-1.306723,1.479294e-03
7112,tW(CCA)K,False,False,False,-0.912602,-4.990121,,,,,,True,True,False,-1.196543,2.352066e-04


In [8]:
# bayexpress results are slightly different ... How different?
# DEG = 'statistically significant change' & | log2 fold change | > 0

print('% overlap of DESeq2 and edgeR: ', 4888/5264)

display(RALL.loc[(RALL.FC0_edgeR == True) | (RALL.FC0_DESeq2 == True)])
display(RALL.loc[(RALL.FC0_edgeR == True) & (RALL.FC0_DESeq2 == True)])

% overlap of DESeq2 and edgeR:  0.9285714285714286


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
2,HRA1,False,False,False,-0.564787,-5.206545,,,,,,True,False,False,-0.851126,2.217160e-04
3,ICR1,True,False,False,0.504048,76.746125,True,False,False,0.238044,1.313037e-09,True,False,False,0.229845,3.867198e-11
49,RNA170,True,False,False,0.785378,15.197425,True,False,False,0.508352,4.155190e-08,True,False,False,0.504294,3.629513e-08
51,RPR1,True,False,False,-0.973507,102.505781,True,True,False,-1.265288,2.773107e-12,True,True,False,-1.301175,4.319062e-17
52,RUF20,True,False,False,0.467352,58.301953,True,False,False,0.199394,3.071342e-02,True,False,False,0.194316,3.406295e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7101,tV(AAC)M2,False,False,False,-0.578734,-5.971615,,,,,,True,False,False,-0.866313,2.086246e-03
7103,tV(AAC)O,False,False,False,-0.581105,-5.767066,,,,,,True,False,False,-0.854055,1.762979e-03
7109,tW(CCA)G1,False,False,False,-1.036282,-5.570935,,,,,,True,True,False,-1.306723,1.479294e-03
7112,tW(CCA)K,False,False,False,-0.912602,-4.990121,,,,,,True,True,False,-1.196543,2.352066e-04


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
3,ICR1,True,False,False,0.504048,76.746125,True,False,False,0.238044,1.313037e-09,True,False,False,0.229845,3.867198e-11
49,RNA170,True,False,False,0.785378,15.197425,True,False,False,0.508352,4.155190e-08,True,False,False,0.504294,3.629513e-08
51,RPR1,True,False,False,-0.973507,102.505781,True,True,False,-1.265288,2.773107e-12,True,True,False,-1.301175,4.319062e-17
52,RUF20,True,False,False,0.467352,58.301953,True,False,False,0.199394,3.071342e-02,True,False,False,0.194316,3.406295e-02
59,SRG1,True,False,False,-0.939803,365.733360,True,True,False,-1.200271,1.238917e-27,True,True,False,-1.208724,3.753389e-58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6817,snR8,False,False,False,-0.105108,-4.021802,True,False,False,-0.380661,1.358238e-04,True,False,False,-0.395842,3.519581e-05
6820,snR82,True,False,False,0.821886,153.959142,True,False,False,0.541819,5.804920e-13,True,False,False,0.531967,4.718385e-17
6821,snR83,False,False,False,-0.049198,-5.961317,True,False,False,-0.305331,1.107774e-02,True,False,False,-0.306221,1.178781e-02
6823,snR85,True,False,False,-0.526487,8.618953,True,False,False,-0.794987,1.287467e-10,True,False,False,-0.812711,1.851697e-13


In [9]:
# Summary stats for the Venn diagramm 
# How many DEGs combined?
# DEG = 'statistically significant change' & | log2 fold change | > 2

print('% overlap all 3 of positive hits: ', 82/131)
print('% overlap all 3 of total number of genes: ', 82/7126)

RALL.loc[(RALL.FC2_bayexpress == True) | (RALL.FC2_edgeR == True) | (RALL.FC2_DESeq2 == True)]

% overlap all 3 of positive hits:  0.6259541984732825
% overlap all 3 of total number of genes:  0.011507156890261015


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
131,YAL061W,True,True,False,-1.938530,9735.106916,True,True,True,-2.224300,4.376841e-51,True,True,True,-2.234932,1.952660e-241
150,YAR009C,True,True,True,-2.197984,10352.936545,True,True,True,-2.454047,2.848397e-82,True,True,True,-2.462212,0.000000e+00
179,YAR071W,True,True,True,-3.996880,28112.980118,True,True,True,-4.262102,3.306246e-89,True,True,True,-4.271751,0.000000e+00
188,YBL005W-B,True,True,False,-1.925874,9631.636408,True,True,True,-2.187252,1.014400e-77,True,True,True,-2.195321,0.000000e+00
232,YBL044W,True,True,True,3.420120,1190.218507,True,True,True,3.143689,8.052335e-58,True,True,True,3.142939,6.391883e-306
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6696,YPR160W,True,True,True,-2.085180,16099.516352,True,True,True,-2.406787,3.024478e-36,True,True,True,-2.419208,6.538792e-105
6965,tL(CAA)G1,True,True,True,2.407807,11.236280,,,,,,True,True,True,2.172841,8.247782e-11
6982,tL(UAG)L1,True,True,True,-2.228047,12.032051,,,,,,True,True,True,-2.525948,3.236995e-18
6993,tM(CAU)P,True,True,True,2.143409,8.264658,,,,,,True,True,False,1.909268,6.283286e-10


In [10]:
# bayexpress results are slightly different ... How different?
# DEG = 'statistically significant change' & | log2 fold change | > 2

print('% overlap of DESeq2 and edgeR: ', 108/116)

RALL.loc[(RALL.FC2_edgeR == True) | (RALL.FC2_DESeq2 == True)]

% overlap of DESeq2 and edgeR:  0.9310344827586207


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
131,YAL061W,True,True,False,-1.938530,9735.106916,True,True,True,-2.224300,4.376841e-51,True,True,True,-2.234932,1.952660e-241
150,YAR009C,True,True,True,-2.197984,10352.936545,True,True,True,-2.454047,2.848397e-82,True,True,True,-2.462212,0.000000e+00
179,YAR071W,True,True,True,-3.996880,28112.980118,True,True,True,-4.262102,3.306246e-89,True,True,True,-4.271751,0.000000e+00
188,YBL005W-B,True,True,False,-1.925874,9631.636408,True,True,True,-2.187252,1.014400e-77,True,True,True,-2.195321,0.000000e+00
232,YBL044W,True,True,True,3.420120,1190.218507,True,True,True,3.143689,8.052335e-58,True,True,True,3.142939,6.391883e-306
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6692,YPR158W-B,True,True,True,-2.067708,11055.783204,True,True,True,-2.330611,3.909165e-80,True,True,True,-2.338970,0.000000e+00
6696,YPR160W,True,True,True,-2.085180,16099.516352,True,True,True,-2.406787,3.024478e-36,True,True,True,-2.419208,6.538792e-105
6965,tL(CAA)G1,True,True,True,2.407807,11.236280,,,,,,True,True,True,2.172841,8.247782e-11
6982,tL(UAG)L1,True,True,True,-2.228047,12.032051,,,,,,True,True,True,-2.525948,3.236995e-18


In [11]:
# Summary stats for the Venn diagramm 
# How many DEGs combined?
# DEG = 'statistically significant change' & | log2 fold change | > 1

print('% overlap all 3 of positive hits: ', 342/681)
print('% overlap all 3 of total number of genes: ', 342/7126)

RALL.loc[(RALL.FC1_bayexpress == True) | (RALL.FC1_edgeR == True) | (RALL.FC1_DESeq2 == True)]

% overlap all 3 of positive hits:  0.5022026431718062
% overlap all 3 of total number of genes:  0.04799326410328375


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
51,RPR1,True,False,False,-0.973507,102.505781,True,True,False,-1.265288,2.773107e-12,True,True,False,-1.301175,4.319062e-17
59,SRG1,True,False,False,-0.939803,365.733360,True,True,False,-1.200271,1.238917e-27,True,True,False,-1.208724,3.753389e-58
76,YAL016C-B,True,True,False,1.060130,126.045252,True,False,False,0.782743,6.583448e-14,True,False,False,0.775214,1.266695e-18
87,YAL025C,True,False,False,-0.958104,1424.577004,True,True,False,-1.190548,5.870594e-22,True,True,False,-1.194373,1.027235e-38
95,YAL031W-A,True,True,False,1.276184,29.160000,,,,,,True,False,False,0.996806,2.009218e-11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7045,tR(UCU)G3,False,False,False,-1.282551,-4.816547,,,,,,True,True,False,-1.500714,1.139737e-03
7069,tS(UGA)I,True,True,False,1.255181,1.923188,,,,,,True,False,False,0.990555,7.163748e-06
7109,tW(CCA)G1,False,False,False,-1.036282,-5.570935,,,,,,True,True,False,-1.306723,1.479294e-03
7112,tW(CCA)K,False,False,False,-0.912602,-4.990121,,,,,,True,True,False,-1.196543,2.352066e-04


In [12]:
# Summary stats for Venn diagramm
# bayexpress results are slightly different ... How different?
# DEG = 'statistically significant change' & | log2 fold change | > 1

print('% overlap of DESeq2 and edgeR: ', 440/543)

RALL.loc[(RALL.FC1_edgeR == True) | (RALL.FC1_DESeq2 == True)]

% overlap of DESeq2 and edgeR:  0.8103130755064457


Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
51,RPR1,True,False,False,-0.973507,102.505781,True,True,False,-1.265288,2.773107e-12,True,True,False,-1.301175,4.319062e-17
59,SRG1,True,False,False,-0.939803,365.733360,True,True,False,-1.200271,1.238917e-27,True,True,False,-1.208724,3.753389e-58
87,YAL025C,True,False,False,-0.958104,1424.577004,True,True,False,-1.190548,5.870594e-22,True,True,False,-1.194373,1.027235e-38
106,YAL038W,True,False,False,-0.960674,40720.852130,True,True,False,-1.233198,1.016964e-31,True,True,False,-1.242215,4.976341e-74
113,YAL044C,True,False,False,-0.796761,1603.445666,True,True,False,-1.080060,4.673820e-26,True,True,False,-1.090641,1.353446e-50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7033,tR(ACG)J,False,False,False,-1.460089,-5.597593,,,,,,True,True,False,-1.363469,5.493450e-03
7045,tR(UCU)G3,False,False,False,-1.282551,-4.816547,,,,,,True,True,False,-1.500714,1.139737e-03
7109,tW(CCA)G1,False,False,False,-1.036282,-5.570935,,,,,,True,True,False,-1.306723,1.479294e-03
7112,tW(CCA)K,False,False,False,-0.912602,-4.990121,,,,,,True,True,False,-1.196543,2.352066e-04


In [13]:
# Which ones are DEG in all 3?
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)]

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
131,YAL061W,True,True,False,-1.938530,9735.106916,True,True,True,-2.224300,4.376841e-51,True,True,True,-2.234932,1.952660e-241
140,YAL067C,True,True,False,1.523475,188.078827,True,True,False,1.251188,6.411681e-25,True,True,False,1.249073,2.464336e-46
150,YAR009C,True,True,True,-2.197984,10352.936545,True,True,True,-2.454047,2.848397e-82,True,True,True,-2.462212,0.000000e+00
153,YAR015W,True,True,False,-1.020118,4299.519172,True,True,False,-1.305210,6.886973e-27,True,True,False,-1.315608,6.180143e-55
170,YAR053W,True,True,False,1.682479,69.970701,True,True,False,1.406428,6.738346e-22,True,True,False,1.415487,1.588253e-35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6679,YPR149W,True,True,False,-1.882684,18763.419642,True,True,True,-2.161032,1.577840e-67,True,True,True,-2.170515,0.000000e+00
6689,YPR158C-D,True,True,True,-2.167181,15140.964639,True,True,True,-2.431654,8.425324e-82,True,True,True,-2.439859,0.000000e+00
6692,YPR158W-B,True,True,True,-2.067708,11055.783204,True,True,True,-2.330611,3.909165e-80,True,True,True,-2.338970,0.000000e+00
6696,YPR160W,True,True,True,-2.085180,16099.516352,True,True,True,-2.406787,3.024478e-36,True,True,True,-2.419208,6.538792e-105


In [14]:
# DEG in edgeR and DESeq2
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)]

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
51,RPR1,True,False,False,-0.973507,102.505781,True,True,False,-1.265288,2.773107e-12,True,True,False,-1.301175,4.319062e-17
59,SRG1,True,False,False,-0.939803,365.733360,True,True,False,-1.200271,1.238917e-27,True,True,False,-1.208724,3.753389e-58
87,YAL025C,True,False,False,-0.958104,1424.577004,True,True,False,-1.190548,5.870594e-22,True,True,False,-1.194373,1.027235e-38
106,YAL038W,True,False,False,-0.960674,40720.852130,True,True,False,-1.233198,1.016964e-31,True,True,False,-1.242215,4.976341e-74
113,YAL044C,True,False,False,-0.796761,1603.445666,True,True,False,-1.080060,4.673820e-26,True,True,False,-1.090641,1.353446e-50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6689,YPR158C-D,True,True,True,-2.167181,15140.964639,True,True,True,-2.431654,8.425324e-82,True,True,True,-2.439859,0.000000e+00
6692,YPR158W-B,True,True,True,-2.067708,11055.783204,True,True,True,-2.330611,3.909165e-80,True,True,True,-2.338970,0.000000e+00
6696,YPR160W,True,True,True,-2.085180,16099.516352,True,True,True,-2.406787,3.024478e-36,True,True,True,-2.419208,6.538792e-105
6704,YPR167C,True,True,False,1.344865,2306.731528,True,True,False,1.055126,1.948567e-27,True,True,False,1.043018,1.424662e-53


In [15]:
# DEG in bayexpress and DESeq2
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_DESeq2 == True)]

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
131,YAL061W,True,True,False,-1.938530,9735.106916,True,True,True,-2.224300,4.376841e-51,True,True,True,-2.234932,1.952660e-241
134,YAL063C-A,True,True,False,1.355379,17.516800,,,,,,True,True,False,1.097090,2.823846e-10
136,YAL064W,True,True,False,1.857325,50.385989,,,,,,True,True,False,1.589857,2.776318e-30
140,YAL067C,True,True,False,1.523475,188.078827,True,True,False,1.251188,6.411681e-25,True,True,False,1.249073,2.464336e-46
150,YAR009C,True,True,True,-2.197984,10352.936545,True,True,True,-2.454047,2.848397e-82,True,True,True,-2.462212,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6965,tL(CAA)G1,True,True,True,2.407807,11.236280,,,,,,True,True,True,2.172841,8.247782e-11
6982,tL(UAG)L1,True,True,True,-2.228047,12.032051,,,,,,True,True,True,-2.525948,3.236995e-18
6993,tM(CAU)P,True,True,True,2.143409,8.264658,,,,,,True,True,False,1.909268,6.283286e-10
7005,tN(GUU)P,True,True,True,-3.710414,42.447841,,,,,,True,True,True,-4.001434,1.326828e-34


In [16]:
# DEG in bayexpress and edgeR
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True)]

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
131,YAL061W,True,True,False,-1.938530,9735.106916,True,True,True,-2.224300,4.376841e-51,True,True,True,-2.234932,1.952660e-241
140,YAL067C,True,True,False,1.523475,188.078827,True,True,False,1.251188,6.411681e-25,True,True,False,1.249073,2.464336e-46
150,YAR009C,True,True,True,-2.197984,10352.936545,True,True,True,-2.454047,2.848397e-82,True,True,True,-2.462212,0.000000e+00
153,YAR015W,True,True,False,-1.020118,4299.519172,True,True,False,-1.305210,6.886973e-27,True,True,False,-1.315608,6.180143e-55
170,YAR053W,True,True,False,1.682479,69.970701,True,True,False,1.406428,6.738346e-22,True,True,False,1.415487,1.588253e-35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6679,YPR149W,True,True,False,-1.882684,18763.419642,True,True,True,-2.161032,1.577840e-67,True,True,True,-2.170515,0.000000e+00
6689,YPR158C-D,True,True,True,-2.167181,15140.964639,True,True,True,-2.431654,8.425324e-82,True,True,True,-2.439859,0.000000e+00
6692,YPR158W-B,True,True,True,-2.067708,11055.783204,True,True,True,-2.330611,3.909165e-80,True,True,True,-2.338970,0.000000e+00
6696,YPR160W,True,True,True,-2.085180,16099.516352,True,True,True,-2.406787,3.024478e-36,True,True,True,-2.419208,6.538792e-105


In [17]:
# Importing a counts table to see what those genes are where the packages are not in agreement

# We plotted q-plots for some of them in examples_genes.ipynb

counts_RALL = pd.read_csv('RALL.csv').set_index('genes')

In [18]:
# Which ones are positive in bayexpress but not the other 2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 != True)].locus_name)]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
YAL016C-B,21,33,43,54,15,72,31,23,23,37,...,133,38,72,51,84,83,51,98,104,70
YAL031W-A,5,7,10,7,3,19,7,3,1,9,...,22,9,15,16,11,24,18,14,27,17
YAR035W,193,132,144,221,146,242,156,395,203,180,...,879,453,827,348,480,427,390,410,429,311
YAR068W,119,91,90,136,99,190,122,237,117,100,...,528,266,354,254,356,281,268,251,250,231
YBR018C,27,61,30,58,42,81,38,46,42,54,...,178,93,118,71,110,154,75,87,123,80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR075C,930,837,824,1262,939,1547,943,1378,1219,933,...,4019,2160,3061,2067,2780,2178,2029,2192,2214,2026
YPR078C,39,52,63,55,33,60,35,42,45,34,...,171,100,129,65,115,131,95,74,123,75
YPR120C,116,165,141,202,167,301,152,168,154,134,...,685,351,412,282,421,373,379,366,416,331
tK(UUU)O,0,8,2,2,2,9,5,1,2,3,...,18,4,4,3,4,7,3,9,11,8


In [19]:
# Which ones are positive in edgeR but not the other 2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 != True)].locus_name)]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [20]:
# Which ones are positive in DESeq2 but not the other 2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 == True)].locus_name)]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
YCR108C,0,0,0,0,0,1,1,0,0,2,...,4,3,1,0,1,2,1,0,0,1
YDL062W,0,7,4,1,6,10,4,1,2,3,...,2,2,1,0,1,4,1,2,3,1
YDL185C-A,8,25,12,17,31,33,32,12,20,29,...,15,10,4,1,6,3,5,9,12,9
YDR426C,0,0,0,0,0,3,0,0,0,1,...,2,6,1,0,1,8,2,6,1,1
YER038W-A,0,1,1,1,0,0,0,0,0,0,...,0,1,1,0,1,2,0,2,1,1
YER084W-A,2,0,2,1,2,4,1,0,1,0,...,3,1,5,4,5,0,2,1,1,5
YER137W-A,0,0,0,0,0,1,0,0,0,0,...,1,0,2,1,1,0,2,2,2,1
YFL063W,0,0,0,0,0,1,0,2,0,0,...,1,3,1,0,1,0,0,1,2,1
YHR131W-A,4,5,9,8,7,17,4,10,6,14,...,6,7,6,1,4,3,2,4,6,3
YIL174W,0,1,1,1,1,1,4,1,0,0,...,3,5,2,2,1,5,1,2,1,0


In [21]:
# Which ones are positive in DESeq2 and bayexpress but not edgeR?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 == True)].locus_name)]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
YAL063C-A,1,6,6,9,7,10,0,3,2,1,...,18,4,7,7,16,14,19,11,25,11
YAL064W,1,5,5,9,4,5,2,4,3,4,...,13,11,14,4,19,18,10,20,17,10
YAR035C-A,1,0,2,1,2,4,1,2,0,1,...,6,6,5,4,6,8,8,6,6,4
YBL108W,0,1,4,1,1,4,1,1,2,2,...,7,5,7,7,7,5,6,5,8,6
YBR072C-A,2,1,3,0,3,2,2,4,2,1,...,8,6,4,2,6,13,7,6,8,10
YBR178W,0,2,4,0,4,5,0,0,1,2,...,5,2,1,5,4,4,1,5,0,2
YBR200W-A,3,0,4,0,1,2,1,1,2,1,...,9,2,3,8,5,6,2,5,4,1
YBR219C,0,0,0,0,1,3,0,0,1,1,...,2,1,2,1,0,0,2,0,0,1
YBR296C-A,2,7,7,2,2,5,5,2,4,8,...,25,12,13,6,9,14,9,17,18,6
YDR524C-A,0,3,3,0,1,3,4,0,1,0,...,13,3,8,6,12,13,11,7,10,7


In [22]:
# Which ones are positive in edgeR and bayexpress but not DESeq2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 != True)].locus_name)]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
YGR161W-C,262,317,255,453,250,447,249,517,368,275,...,1466,870,1296,579,1000,1083,748,645,929,594


In [23]:
# Which ones are positive in all 3?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)].locus_name)]

# some summary statistics


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
YAL061W,2114,1725,1485,3013,1544,2846,1915,2976,2486,1792,...,944,526,805,489,662,575,511,491,557,443
YAL067C,11,23,20,28,29,37,19,23,18,26,...,107,52,60,40,67,95,61,55,61,50
YAR009C,1242,1708,1405,2207,1834,4065,2108,2185,1655,2041,...,789,451,439,359,448,443,379,498,479,369
YAR015W,2542,1805,1652,4246,2038,3186,2365,1911,2441,1606,...,2001,1083,1307,1263,1735,1075,1480,1376,1206,1282
YAR053W,2,7,2,8,11,13,1,12,5,9,...,40,23,20,14,16,40,25,21,31,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR149W,2914,4331,3778,5798,3821,6318,3971,5881,4551,3546,...,1911,1084,1630,1072,1312,1403,1118,1105,1274,985
YPR158C-D,1826,2540,1963,3141,2640,5748,3276,3455,2506,2949,...,1301,717,702,547,671,689,575,675,680,464
YPR158W-B,1499,2014,1501,2473,1962,4248,2598,2703,2025,2261,...,1070,528,617,475,511,567,493,606,575,422
YPR160W,3193,1747,3896,4896,1376,3447,2434,4940,2640,2024,...,1380,914,994,626,758,985,562,663,806,481


In [24]:
# Which ones are not DEG in all 3?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 != True)].locus_name)]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15S_rRNA,2,12,31,8,21,11,2,3,7,6,...,7,15,6,1,85,8,34,3,6,3
21S_rRNA,20,76,101,99,128,74,36,32,57,104,...,64,79,67,13,576,52,234,23,44,24
HRA1,3,2,2,2,3,5,4,0,4,4,...,5,5,0,2,0,1,2,2,0,1
ICR1,75,123,107,157,98,245,119,120,119,132,...,286,155,184,164,202,186,148,183,195,145
LSR1,60,163,233,163,193,375,194,84,211,158,...,250,195,153,57,742,227,227,117,224,136
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tY(GUA)J2,0,1,0,0,0,2,0,0,0,0,...,1,1,0,0,0,0,0,2,1,1
tY(GUA)M1,0,0,1,1,1,1,0,0,1,0,...,1,1,1,0,0,0,0,0,0,1
tY(GUA)M2,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tY(GUA)O,0,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [25]:
# which ones are positive in DESeq2 but not the other 2?

# (see cell above)


RALL_bayexpress.set_index('locus_name').loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 == True)].locus_name)]


Unnamed: 0_level_0,BF_21,FC,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress
locus_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
YCR108C,-4.288472,1.513915,False,False,False
YDL062W,-3.484584,-0.86686,False,False,False
YDL185C-A,17.676173,-0.867276,True,False,False
YDR426C,-2.244519,1.309298,False,False,False
YER038W-A,-4.221007,1.906233,False,False,False
YER084W-A,0.07062,1.45974,False,False,False
YER137W-A,-3.602328,1.639446,False,False,False
YFL063W,-4.415139,1.446801,False,False,False
YHR131W-A,4.11135,-0.861831,True,False,False
YIL174W,-1.406973,1.361913,False,False,False


In [26]:
# Which ones are positive in DESeq2 and edgeR but not bayexpress?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)].locus_name)]

# oh no, let's check out what's going on here in the bayexpress results

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RPR1,31,42,51,93,79,83,80,48,145,51,...,50,44,53,16,99,28,25,15,40,21
SRG1,97,278,301,388,283,566,221,171,226,283,...,199,122,123,64,138,217,129,143,191,148
YAL025C,369,1108,520,664,1302,2505,1007,921,1010,1175,...,861,438,569,318,471,555,454,506,610,445
YAL038W,21080,18869,14649,28551,20208,38816,35125,38974,29292,22233,...,26024,13995,15559,12302,13322,10531,11557,12994,10106,8438
YAL044C,1486,1027,1013,2058,1100,1784,1216,1794,1447,1066,...,1420,799,1034,770,1015,731,803,770,772,725
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR009W,318,600,510,762,548,915,504,478,506,492,...,458,221,262,191,326,316,248,278,312,270
YPR112C,237,885,418,538,945,2142,832,665,640,1026,...,673,450,398,279,371,453,396,460,505,399
YPR124W,1324,2491,2145,3285,2306,4274,2244,2273,2158,2155,...,2148,1101,1353,961,1369,1579,1257,1360,1510,1169
YPR145W,6192,4080,3953,7213,3976,8581,6461,7377,5469,4926,...,6778,3332,4141,2820,3552,3190,2738,3619,3285,2902


In [27]:
# which ones are positive in DESeq2 and edgeR but not bayexpress?

# (see cell above)

# oh no, let's check out what's going on here in the bayexpress results


RALL_bayexpress.set_index('locus_name').loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)].locus_name)]

# okay looks like it's a close race with the FC cutoffs

Unnamed: 0_level_0,BF_21,FC,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress
locus_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RPR1,102.505781,-0.973507,True,False,False
SRG1,365.733360,-0.939803,True,False,False
YAL025C,1424.577004,-0.958104,True,False,False
YAL038W,40720.852130,-0.960674,True,False,False
YAL044C,1603.445666,-0.796761,True,False,False
...,...,...,...,...,...
YPR009W,785.409899,-0.954538,True,False,False
YPR112C,779.847491,-0.781090,True,False,False
YPR124W,3522.143470,-0.950772,True,False,False
YPR145W,5839.427360,-0.752430,True,False,False


In [28]:
RALL

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
0,15S_rRNA,False,False,False,0.274047,-3.710645,,,,,,False,False,False,-0.136618,6.884290e-01
1,21S_rRNA,False,False,False,0.147177,-1.302642,False,False,False,-0.187417,5.227208e-01,False,False,False,-0.243675,4.000494e-01
2,HRA1,False,False,False,-0.564787,-5.206545,,,,,,True,False,False,-0.851126,2.217160e-04
3,ICR1,True,False,False,0.504048,76.746125,True,False,False,0.238044,1.313037e-09,True,False,False,0.229845,3.867198e-11
4,LSR1,False,False,False,0.120243,0.451012,False,False,False,-0.191748,2.362411e-01,False,False,False,-0.214104,1.874278e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7121,tY(GUA)J2,False,False,False,0.861839,-7.312143,,,,,,False,False,False,-0.042322,9.662608e-01
7122,tY(GUA)M1,False,False,False,0.022303,-7.406574,,,,,,False,False,False,-0.273708,5.756379e-01
7123,tY(GUA)M2,False,False,False,0.183767,-7.727890,,,,,,False,False,False,-0.301508,8.326090e-01
7124,tY(GUA)O,False,False,False,0.347266,-7.622253,,,,,,False,False,False,-0.239053,8.377597e-01


In [29]:
RALL.describe()

Unnamed: 0,FC,BF_21,logFC,PValue,log2FoldChange,pvalue
count,7126.0,7126.0,6094.0,6094.0,6885.0,6885.0
mean,0.217628,740.541412,-0.048807,0.0775946,-0.048703,0.1077775
std,0.614573,6238.005763,0.5918,0.1991449,0.605104,0.2348169
min,-7.327772,-8.27372,-7.557956,2.27596e-99,-7.591028,0.0
25%,-0.047986,2.525412,-0.255375,1.614042e-17,-0.282666,7.109288e-23
50%,0.26728,83.331612,0.011541,6.242197e-08,-0.001488,4.029648e-07
75%,0.51195,339.509196,0.234648,0.008863504,0.238861,0.04287686
max,3.999632,341558.582413,3.673085,0.9995016,3.73698,0.9985221


In [30]:
# Which ones are DEG in bayexpress only?
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 != True)]

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF_21,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
76,YAL016C-B,True,True,False,1.060130,126.045252,True,False,False,0.782743,6.583448e-14,True,False,False,0.775214,1.266695e-18
95,YAL031W-A,True,True,False,1.276184,29.160000,,,,,,True,False,False,0.996806,2.009218e-11
166,YAR035W,True,True,False,1.227418,1011.933095,True,False,False,0.939637,3.036895e-19,True,False,False,0.925055,3.772076e-29
176,YAR068W,True,True,False,1.145139,552.837207,True,False,False,0.875260,2.675109e-20,True,False,False,0.863466,1.079119e-31
331,YBR018C,True,True,False,1.211889,212.256397,True,False,False,0.935319,6.070623e-33,True,False,False,0.931106,6.410996e-76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6601,YPR075C,True,True,False,1.085259,4093.239055,True,False,False,0.808931,6.885686e-31,True,False,False,0.799558,9.334362e-66
6604,YPR078C,True,True,False,1.118861,178.355754,True,False,False,0.835384,9.116307e-21,True,False,False,0.823888,5.688491e-35
6646,YPR120C,True,True,False,1.088142,656.390022,True,False,False,0.818728,7.358504e-41,True,False,False,0.811897,1.954597e-123
6959,tK(UUU)O,True,True,False,1.123969,6.672051,,,,,,True,False,False,0.847818,1.101103e-06


In [31]:
# counts (watch the NaNs!)

display(RALL.FC1_bayexpress.value_counts(dropna=False))

display(RALL.FC1_edgeR.value_counts(dropna=False))

display(RALL.FC1_DESeq2.value_counts(dropna=False))

FC1_bayexpress
False    6593
True      533
Name: count, dtype: int64

FC1_edgeR
False    5653
NaN      1032
True      441
Name: count, dtype: int64

FC1_DESeq2
False    6343
True      542
NaN       241
Name: count, dtype: int64

In [32]:
# what are all the NaNs in DESeq2?

counts_RALL.loc[list((RALL.FC1_DESeq2.isna()))]

# well, fair enough

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PWR1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Q0010,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Q0017,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Q0032,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Q0080,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tT(UGU)Q1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tT(XXX)Q2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tV(UAC)Q,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tW(UCA)Q,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
# what are all the NaNs in edgeR?

counts_RALL.loc[list((RALL.FC1_edgeR.isna()))]

# hmm, interesting

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,35.1,36.1,37.1,38.1,39.1,40.1,41.1,42.1,43,44
genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15S_rRNA,2,12,31,8,21,11,2,3,7,6,...,7,15,6,1,85,8,34,3,6,3
HRA1,3,2,2,2,3,5,4,0,4,4,...,5,5,0,2,0,1,2,2,0,1
PWR1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Q0010,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Q0017,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tY(GUA)J2,0,1,0,0,0,2,0,0,0,0,...,1,1,0,0,0,0,0,2,1,1
tY(GUA)M1,0,0,1,1,1,1,0,0,1,0,...,1,1,1,0,0,0,0,0,0,1
tY(GUA)M2,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tY(GUA)O,0,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [34]:
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

# Coloring for Colorblindness: https://davidmathlogic.com/colorblind/#%23332288-%23117733-%2344AA99-%2388CCEE-%23DDCC77-%23CC6677-%23AA4499-%23882255


plt.figure(figsize=(3,3), dpi=300)


v = venn3([set(RALL.loc[(RALL.FC2_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_DESeq2 == True)].locus_name)], 
                set_labels = ('bayexpress', 'edgeR', 'DESeq2'),set_colors=("#332288", "#882255", '#DDCC77'), alpha=0.6)

venn3_circles([set(RALL.loc[(RALL.FC2_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_DESeq2 == True)].locus_name)],  
                linestyle="-", linewidth=0.0) 


plt.show()

# exported for paper as FC2.png

ModuleNotFoundError: No module named 'matplotlib_venn'

In [36]:
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

# Coloring for Colorblindness: https://davidmathlogic.com/colorblind/#%23332288-%23117733-%2344AA99-%2388CCEE-%23DDCC77-%23CC6677-%23AA4499-%23882255

plt.figure(figsize=(3,3), dpi=300)


v = venn3([set(RALL.loc[(RALL.FC1_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_DESeq2 == True)].locus_name)], 
                set_labels = ('bayexpress', 'edgeR', 'DESeq2'),set_colors=("#332288", "#882255", '#DDCC77'), alpha=0.6)

venn3_circles([set(RALL.loc[(RALL.FC1_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_DESeq2 == True)].locus_name)],  
                linestyle="-", linewidth=0.0) 


plt.show()
# exported for paper as FC1.png

ModuleNotFoundError: No module named 'matplotlib_venn'

In [None]:
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

# Coloring for Colorblindness: https://davidmathlogic.com/colorblind/#%23332288-%23117733-%2344AA99-%2388CCEE-%23DDCC77-%23CC6677-%23AA4499-%23882255

plt.figure(figsize=(3,3), dpi=300)


v = venn3([set(RALL.loc[(RALL.FC0_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_DESeq2 == True)].locus_name)], 
                set_labels = ('bayexpress', 'edgeR', 'DESeq2'),set_colors=("#332288", "#882255", '#DDCC77'), alpha=0.6)

venn3_circles([set(RALL.loc[(RALL.FC0_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_DESeq2 == True)].locus_name)],  
                linestyle="-", linewidth=0.0) 


plt.show()
# exported for paper as FC0.png

In [None]:
# Which ones are positive in bayexpress but not the other 2?
# DEG = 'statistically significant change' & | log2 fold change | > 2

display(counts_RALL.loc[list(RALL.loc[(RALL.FC2_bayexpress == True) & (RALL.FC2_edgeR != True) & (RALL.FC2_DESeq2 != True)].locus_name)])


In [None]:
# Which ones are positive in DESeq2 and edgeR but not bayexpress?
# DEG = 'statistically significant change' & | log2 fold change | > 2

display(counts_RALL.loc[list(RALL.loc[(RALL.FC2_bayexpress != True) & (RALL.FC2_edgeR == True) & (RALL.FC2_DESeq2 == True)].locus_name)])


In [None]:
# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.FC, RALL.log2FoldChange, c='#44AA99', s=300,
alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.FC, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.log2FoldChange, 
              
              bins=50, histtype='step', color='#332288', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8)

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (Bayesian framework)')
ax.set_ylabel('log2 fold change (DESeq2)')


In [None]:
RALL.iloc[counts_RALL.reset_index().index[(counts_RALL > 0).all(axis=1)].tolist()]

In [None]:
# Let's check if the non-diagonal points are n = 0 genes
# Here we filter out all genes where at least one replicate has 0 reads mapping to the gene

# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.iloc[counts_RALL.reset_index().index[(counts_RALL > 0).all(axis=1)].tolist()].FC, RALL.iloc[counts_RALL.reset_index().index[(counts_RALL > 0).all(axis=1)].tolist()].log2FoldChange, c='#44AA99', s=300,
alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.FC, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.log2FoldChange, 
              
              bins=50, histtype='step', color='#332288', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8)

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (Bayesian framework)')
ax.set_ylabel('log2 fold change (DESeq2)')

In [None]:
# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.FC, RALL.logFC, c='#44AA99', s=300,
alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.FC, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.logFC, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8)

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (Bayesian framework)')
ax.set_ylabel('log2 fold change (edgeR)')


In [None]:
# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.log2FoldChange, RALL.logFC, c='#44AA99', s=300, alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.log2FoldChange, 
              
              bins=50, histtype='step', color='#332288', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.logFC, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8) 

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (DESeq2)')
ax.set_ylabel('log2 fold change (edgeR)')


In [None]:
# Checking the ones where the fold change doesn't match

display(RALL.loc[((RALL.FC - RALL.log2FoldChange) > 0.5)])

counts_RALL.loc[list(RALL.loc[((RALL.FC - RALL.log2FoldChange) > 0.5)].locus_name)]

# Hah, okay that's Laplace?

# And no deviation with edgeR is explained as they filter out genes with zeros


In [None]:
RALL

In [None]:
# I want to see genes with very low iFC and high Bayes factors 

RALL.loc[(abs(RALL.FC) < 0.5) & (RALL.BF_21 > 100)]


In [None]:
# I want to see genes with very low Bayes factors and high inferred fold change

display(RALL.loc[(abs(RALL.FC) > 2) & (RALL.BF_21 < 1)])

print(list(RALL.loc[(abs(RALL.FC) > 2) & (RALL.BF_21 < 1)].locus_name))


# very very low read numbers for those genes

In [None]:
# P-vlaues (DESeq2) vs. Bayes factors

fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

# ax.set_xlim(-8, 100)
ax.set_ylim(-8, 100)

# the scatter plot:
ax.scatter(RALL.pvalue, RALL.BF, c='black', s=30,
alpha=0.4, marker='x')

# now determine nice limits by hand:

ax_histx.hist(RALL.pvalue, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.BF, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('p-value (edgeR)')
ax.set_ylabel('Bayes factor')


In [None]:
# P-vlaues (edgeR) vs. Bayes factors

fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

# ax.set_xlim(-8, 100)
ax.set_ylim(-8, 100)

# the scatter plot:
ax.scatter(RALL.PValue, RALL.BF, c='black', s=30,
alpha=0.4, marker='x')

# now determine nice limits by hand:

ax_histx.hist(RALL.PValue, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.BF, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('p-value (edgeR)')
ax.set_ylabel('Bayes factor')


In [None]:
# P-vlaues (edgeR) vs. p-value (DESeq2)

fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

# ax.set_xlim(-8, 100)
# ax.set_ylim(-8, 100)

# the scatter plot:
ax.scatter(RALL.PValue, RALL.pvalue, c='black', s=30,
alpha=0.4, marker='x')

# now determine nice limits by hand:

ax_histx.hist(RALL.PValue, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.pvalue, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('p-value (DESeq2)')
ax.set_ylabel('p-value (edgeR)')


In [None]:
# I want to see genes with very high Bayes factors and high p-values

display(RALL.loc[(abs(RALL.pvalue) > 0.7) & (RALL.BF_21 > 100)])

print(list(RALL.loc[(abs(RALL.pvalue) > 0.7) & (RALL.BF_21 > 100)].locus_name))


## RANK BASED COMPARISON

In [None]:
def calc_rbo(ranked_list_1, ranked_list_2, p):
    try:
        assert 0 < p < 1
    except AssertionError:
        return f"p = {p}, p should be (0,1)."
   
    try:
        assert len(ranked_list_2) > 0 and len(ranked_list_1) > 0
    except AssertionError:
        return f"Error: Empty list supplied [{len(ranked_list_1)}, {len(ranked_list_2)}]"
   
    # dertermine the corresponding shorter and longer lists
    short_list, long_list = (ranked_list_1, ranked_list_2) if len(ranked_list_1) < len(ranked_list_2) else (ranked_list_2, ranked_list_1)
    short_list_len, long_list_len = len(short_list), len(long_list) # s, l
    # from 0 to len(longer_list) calculate intersections
    short_list_set = set()
    long_list_set = set()   
    intersections = {}
    for i in range(long_list_len):
        long_list_set.add(long_list[i])
        if i < short_list_len:
            short_list_set.add(short_list[i])
        intersections[i+1] = float(len(short_list_set.intersection(long_list_set))) # {d: X_{d}}
   
    # term 1
    # \sum_{d=1}^{l} (X_{d}/d)*p^{d}
    # d = i+1, bc python indexing from 0
    term_1 = sum(intersections[i+1]/(i+1)*pow(p,(i+1)) for i in range(long_list_len))
   
    # term 2
    # \sum_{d=s+1}^{l} ((X_{s}(d-s)/sd)*p^{d})
    Xs = intersections[short_list_len]
    Xl = intersections[long_list_len]
    term_2 = sum(((Xs * ( (i+1) - short_list_len)) / (short_list_len*(i+1)) * pow(p, (i+1) )) for i in range(short_list_len, long_list_len))
 
    # term 3
    # [(X_{l} - X_{s}) / l + X_{s} / s] * p^{l}
    term_3 = ((Xl - Xs) / (long_list_len) + (Xs) / (short_list_len) ) * pow(p, long_list_len)
 
    # RBO = ((1-p)/p)*(term_1 +term_2) + term_3
    return ((1-p)/p)*(term_1 +term_2) + term_3

In [None]:
RALL['abs_iFC'] = abs(RALL.FC.to_numpy())
RALL['abs_FC_edgeR'] = abs(RALL.logFC.to_numpy())
RALL['abs_FC_DESeq2'] = abs(RALL.log2FoldChange.to_numpy())


In [None]:
RBO_index_L = [ 'BF_21',
                'FC_edgeR',
                'p_edgeR',
                'FC_DESeq2',
                'p_DESeq2',
                'FC_edgeR',
                'p_edgeR',
                'FC_DESeq2',
                'p_DESeq2',
                'p_edgeR',
                'FC_DESeq2',
                'p_DESeq2',
                'FC_DESeq2',
                'p_DESeq2',
                'p_DESeq2' ]

RBO_index_R = ['iFC',
               'iFC',
               'iFC',
               'iFC',
               'iFC',
               'BF_21',
               'BF_21',
               'BF_21',
               'BF_21',
               'FC_edgeR',
               'FC_edgeR',
               'FC_edgeR',
               'p_edgeR',
               'p_edgeR',
               'FC_DESeq2']

In [None]:
p = 0.1

RBO_1 = []

RBO_1.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_1, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.2

RBO_2 = []

RBO_2.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_2, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.3

RBO_3 = []

RBO_3.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_3, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.4

RBO_4 = []

RBO_4.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_4, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.5

RBO_5 = []

RBO_5.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_5, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.6

RBO_6 = []

RBO_6.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_6, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.7

RBO_7 = []

RBO_7.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_7, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.8

RBO_8 = []

RBO_8.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_8, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.9

RBO_9 = []

RBO_9.append(calc_rbo(RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF_21', ascending=False).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_9, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
# RANK BASED COMPARISON PLOT


fig = plt.figure(figsize=(6, 6), dpi=300)

ax = fig.add_subplot(1, 1, 1)  # Create an axes object

ax.set_xlim(-0.1, 1.1)
ax.set_ylim(-0.1, 1.1)

# Enable grid
ax.grid(True, which='both', linestyle='--', linewidth=0.5)

# the scatter plot:
ax.scatter(np.full([15],0.1), RBO_1, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.2), RBO_2, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.3), RBO_3, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.4), RBO_4, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.5), RBO_5, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.6), RBO_6, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.7), RBO_7, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.8), RBO_8, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.9), RBO_9, c='r', s=35,
alpha=0.8, marker='x')

ax.set_xticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
ax.set_yticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

ax.set_xlabel('p')
ax.set_ylabel('RBO')

plt.show()  # Don't forget to show the plot


In [None]:
RALL.sort_values(by='BF_21', ascending=False)