# P A C K A G E : : : C O M P A R I S O N

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# importing bayexpress results, all (clean) replicates included
# differential gene expression analysis was done in do_DGE.ipynb

RALL_bayexpress = pd.read_csv('DGE_results/RALL_bayexpress.csv').iloc[:,1:].rename(columns={'genes': "locus_name"})

# we don't loose any genes here for nans
# RALL_bayexpress = RALL_bayexpress.dropna(subset=['BF', 'FC'])

# criteria 0 for significance: (BF > 1) & (abs(FC) > 0)
RALL_bayexpress['FC0_bayexpress'] = (RALL_bayexpress.BF > 1) & (abs(RALL_bayexpress.FC) > 0)

# criteria 1 for significance: (BF > 1) & (abs(FC) > 1)
RALL_bayexpress['FC1_bayexpress'] = (RALL_bayexpress.BF > 1) & (abs(RALL_bayexpress.FC) > 1)

# criteria 2 for significance: (BF > 1) & (abs(FC) > 2)
RALL_bayexpress['FC2_bayexpress'] = (RALL_bayexpress.BF > 1) & (abs(RALL_bayexpress.FC) > 2)

RALL_bayexpress

Unnamed: 0,locus_name,BF,FC,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress
0,15S_rRNA,-3.710645,0.274047,False,False,False
1,21S_rRNA,-1.302642,0.147177,False,False,False
2,HRA1,-5.206545,-0.564787,False,False,False
3,ICR1,76.746125,0.504048,True,False,False
4,LSR1,0.451012,0.120243,False,False,False
...,...,...,...,...,...,...
7121,tY(GUA)J2,-7.312143,0.861839,False,False,False
7122,tY(GUA)M1,-7.406574,0.022303,False,False,False
7123,tY(GUA)M2,-7.727890,0.183767,False,False,False
7124,tY(GUA)O,-7.622253,0.347266,False,False,False


In [4]:
# importing edgeR results, all (clean) replicates included
# differential gene expression analysis was done in do_DGE.ipynb

RALL_edgeR = pd.read_csv('DGE_results/RALL_edgeR.csv').rename(columns={'genes': "locus_name"})

# we don't loose any genes here for nans
# RALL_edgeR = RALL_edgeR.dropna(subset=['logFC', 'PValue'])


# criteria 0 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 0)
RALL_edgeR['FC0_edgeR'] = (RALL_edgeR.PValue < 0.05) & (abs(RALL_edgeR.logFC) > 0)

# criteria 1 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 1)
RALL_edgeR['FC1_edgeR'] = (RALL_edgeR.PValue < 0.05) & (abs(RALL_edgeR.logFC) > 1)

# criteria 2 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 2)
RALL_edgeR['FC2_edgeR'] = (RALL_edgeR.PValue < 0.05) & (abs(RALL_edgeR.logFC) > 2)


RALL_edgeR

Unnamed: 0,locus_name,logFC,logCPM,F,PValue,FDR,FC0_edgeR,FC1_edgeR,FC2_edgeR
0,YIL121W,-2.833997,6.681355,1.393437e+04,2.275960e-99,1.386970e-95,True,True,True
1,YDR033W,-3.765504,8.923851,1.149128e+04,1.110106e-95,3.382494e-92,True,True,True
2,YML123C,-4.669706,9.152594,1.051245e+04,5.593728e-94,1.136273e-90,True,True,True
3,YGR234W,-4.212037,8.462599,9.288989e+03,1.292927e-91,1.969774e-88,True,True,True
4,YOR290C,-7.557956,6.530807,9.462461e+03,1.696224e-91,2.067358e-88,True,True,True
...,...,...,...,...,...,...,...,...,...
6089,YBL052C,0.000363,5.364961,4.246443e-05,9.948153e-01,9.954687e-01,False,False,False
6090,YHR117W,0.000097,6.355627,2.441982e-05,9.960682e-01,9.965588e-01,False,False,False
6091,YMR123W,-0.000171,6.143127,2.150691e-05,9.963102e-01,9.966373e-01,False,False,False
6092,YGR136W,0.000152,6.616432,1.272198e-05,9.971621e-01,9.973258e-01,False,False,False


In [5]:
# importing DESeq2 results, all (clean) replicates included
# differential gene expression analysis was done in do_DGE.ipynb


RALL_DESeq2 = pd.read_csv('DGE_results/RALL_DESeq2.csv').rename(columns={'Unnamed: 0': "locus_name"})

# this is a bit crazy but we loose a lot of genes for nans here
RALL_DESeq2 = RALL_DESeq2.dropna(subset=['log2FoldChange', 'pvalue'])

# criteria 0 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 0)
RALL_DESeq2['FC0_DESeq2'] = (RALL_DESeq2.pvalue < 0.05) & (abs(RALL_DESeq2.log2FoldChange) > 0)

# criteria 1 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 1)
RALL_DESeq2['FC1_DESeq2'] = (RALL_DESeq2.pvalue < 0.05) & (abs(RALL_DESeq2.log2FoldChange) > 1)

# criteria 2 for significance: (pvalue < 0.05) & (abs(log2FoldChange) > 2)
RALL_DESeq2['FC2_DESeq2'] = (RALL_DESeq2.pvalue < 0.05) & (abs(RALL_DESeq2.log2FoldChange) > 2)

RALL_DESeq2

Unnamed: 0,locus_name,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2
0,15S_rRNA,16.994794,-0.136618,0.340703,-0.400988,6.884290e-01,7.313923e-01,False,False,False
1,21S_rRNA,102.643528,-0.243675,0.289561,-0.841533,4.000494e-01,4.548869e-01,False,False,False
2,HRA1,2.550649,-0.851126,0.230477,-3.692895,2.217160e-04,3.651076e-04,True,False,False
3,ICR1,142.961822,0.229845,0.034777,6.609078,3.867198e-11,9.556948e-11,True,False,False
4,LSR1,196.640749,-0.214104,0.162418,-1.318227,1.874278e-01,2.269904e-01,False,False,False
...,...,...,...,...,...,...,...,...,...,...
7120,tY(GUA)J1,0.140228,-0.042322,1.074228,-0.039397,9.685735e-01,9.738411e-01,False,False,False
7121,tY(GUA)J2,0.133175,-0.042322,1.000548,-0.042298,9.662608e-01,9.724756e-01,False,False,False
7122,tY(GUA)M1,0.385050,-0.273708,0.488968,-0.559768,5.756379e-01,6.256143e-01,False,False,False
7123,tY(GUA)M2,0.080417,-0.301508,1.426535,-0.211357,8.326090e-01,8.622913e-01,False,False,False


In [6]:
RALL = pd.DataFrame({'locus_name': RALL_bayexpress.locus_name})

# RALL = pd.concat([RALL, RALL_bayexpress.iloc[:,-2:], RALL_edgeR.iloc[:,-2:], RALL_DESeq2.iloc[:,-2:]], axis=1)

RALL = RALL.merge(RALL_bayexpress[['locus_name', 'FC0_bayexpress', 'FC1_bayexpress', 'FC2_bayexpress', 'FC', 'BF']], on='locus_name', how='left')

# display(RALL)

RALL = RALL.merge(RALL_edgeR[['locus_name', 'FC0_edgeR', 'FC1_edgeR', 'FC2_edgeR', 'logFC', 'PValue']], on='locus_name', how='left')

RALL = RALL.merge(RALL_DESeq2[['locus_name', 'FC0_DESeq2', 'FC1_DESeq2', 'FC2_DESeq2', 'log2FoldChange', 'pvalue']], on='locus_name', how='left')

# RALL.to_csv('results_RALL.csv')

RALL

Unnamed: 0,locus_name,FC0_bayexpress,FC1_bayexpress,FC2_bayexpress,FC,BF,FC0_edgeR,FC1_edgeR,FC2_edgeR,logFC,PValue,FC0_DESeq2,FC1_DESeq2,FC2_DESeq2,log2FoldChange,pvalue
0,15S_rRNA,False,False,False,0.274047,-3.710645,,,,,,False,False,False,-0.136618,6.884290e-01
1,21S_rRNA,False,False,False,0.147177,-1.302642,False,False,False,-0.187417,5.227208e-01,False,False,False,-0.243675,4.000494e-01
2,HRA1,False,False,False,-0.564787,-5.206545,,,,,,True,False,False,-0.851126,2.217160e-04
3,ICR1,True,False,False,0.504048,76.746125,True,False,False,0.238044,1.313037e-09,True,False,False,0.229845,3.867198e-11
4,LSR1,False,False,False,0.120243,0.451012,False,False,False,-0.191748,2.362411e-01,False,False,False,-0.214104,1.874278e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7121,tY(GUA)J2,False,False,False,0.861839,-7.312143,,,,,,False,False,False,-0.042322,9.662608e-01
7122,tY(GUA)M1,False,False,False,0.022303,-7.406574,,,,,,False,False,False,-0.273708,5.756379e-01
7123,tY(GUA)M2,False,False,False,0.183767,-7.727890,,,,,,False,False,False,-0.301508,8.326090e-01
7124,tY(GUA)O,False,False,False,0.347266,-7.622253,,,,,,False,False,False,-0.239053,8.377597e-01


In [None]:
# example genes in the paper

RALL.set_index('locus_name').loc[['YAL016C-B', 'YDL062W', 'RPR1', 'YAL063C-A', 'YGR161W-C', 'YAL061W', '15S_rRNA']]

In [None]:
# Summary stats for the Venn diagramm 
# How many DEGs combined?
# DEG = 'statistically significant change' & | log2 fold change | > 0

print('% overlap all 3 of positive hits: ', 4251/6259)
print('% overlap all 3 of total number of genes: ', 4251/7126)

RALL.loc[(RALL.FC0_bayexpress == True) | (RALL.FC0_edgeR == True) | (RALL.FC0_DESeq2 == True)]

In [None]:
# bayexpress results are slightly different ... How different?
# DEG = 'statistically significant change' & | log2 fold change | > 0

print('% overlap of DESeq2 and edgeR: ', 4888/5264)

display(RALL.loc[(RALL.FC0_edgeR == True) | (RALL.FC0_DESeq2 == True)])
display(RALL.loc[(RALL.FC0_edgeR == True) & (RALL.FC0_DESeq2 == True)])

In [None]:
# Summary stats for the Venn diagramm 
# How many DEGs combined?
# DEG = 'statistically significant change' & | log2 fold change | > 2

print('% overlap all 3 of positive hits: ', 82/131)
print('% overlap all 3 of total number of genes: ', 82/7126)

RALL.loc[(RALL.FC2_bayexpress == True) | (RALL.FC2_edgeR == True) | (RALL.FC2_DESeq2 == True)]

In [None]:
# bayexpress results are slightly different ... How different?
# DEG = 'statistically significant change' & | log2 fold change | > 2

print('% overlap of DESeq2 and edgeR: ', 108/116)

RALL.loc[(RALL.FC2_edgeR == True) | (RALL.FC2_DESeq2 == True)]

In [None]:
# Summary stats for the Venn diagramm 
# How many DEGs combined?
# DEG = 'statistically significant change' & | log2 fold change | > 1

print('% overlap all 3 of positive hits: ', 342/681)
print('% overlap all 3 of total number of genes: ', 342/7126)

RALL.loc[(RALL.FC1_bayexpress == True) | (RALL.FC1_edgeR == True) | (RALL.FC1_DESeq2 == True)]

In [None]:
# Summary stats for Venn diagramm
# bayexpress results are slightly different ... How different?
# DEG = 'statistically significant change' & | log2 fold change | > 1

print('% overlap of DESeq2 and edgeR: ', 440/543)

RALL.loc[(RALL.FC1_edgeR == True) | (RALL.FC1_DESeq2 == True)]

In [None]:
# Which ones are DEG in all 3?
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)]

In [None]:
# DEG in edgeR and DESeq2
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)]

In [None]:
# DEG in bayexpress and DESeq2
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_DESeq2 == True)]

In [None]:
# DEG in bayexpress and edgeR
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True)]

In [None]:
# Importing a counts table to see what those genes are where the packages are not in agreement

# We plotted q-plots for some of them in examples_genes.ipynb

counts_RALL = pd.read_csv('RALL.csv').set_index('genes')

In [None]:
# Which ones are positive in bayexpress but not the other 2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 != True)].locus_name)]

In [None]:
# Which ones are positive in edgeR but not the other 2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 != True)].locus_name)]

In [None]:
# Which ones are positive in DESeq2 but not the other 2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 == True)].locus_name)]

In [None]:
# Which ones are positive in DESeq2 and bayexpress but not edgeR?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 == True)].locus_name)]

In [None]:
# Which ones are positive in edgeR and bayexpress but not DESeq2?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 != True)].locus_name)]

In [None]:
# Which ones are positive in all 3?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)].locus_name)]

# some summary statistics


In [None]:
# Which ones are not DEG in all 3?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 != True)].locus_name)]

In [None]:
# which ones are positive in DESeq2 but not the other 2?

# (see cell above)


RALL_bayexpress.set_index('locus_name').loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 == True)].locus_name)]


In [None]:
# Which ones are positive in DESeq2 and edgeR but not bayexpress?

counts_RALL.loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)].locus_name)]

# oh no, let's check out what's going on here in the bayexpress results

In [None]:
# which ones are positive in DESeq2 and edgeR but not bayexpress?

# (see cell above)

# oh no, let's check out what's going on here in the bayexpress results


RALL_bayexpress.set_index('locus_name').loc[list(RALL.loc[(RALL.FC1_bayexpress != True) & (RALL.FC1_edgeR == True) & (RALL.FC1_DESeq2 == True)].locus_name)]

# okay looks like it's a close race with the FC cutoffs

In [None]:
RALL

In [None]:
RALL.describe()

In [None]:
# Which ones are DEG in bayexpress only?
# DEG = 'statistically significant change' & | log2 fold change | > 1

RALL.loc[(RALL.FC1_bayexpress == True) & (RALL.FC1_edgeR != True) & (RALL.FC1_DESeq2 != True)]

In [None]:
# counts (watch the NaNs!)

display(RALL.FC1_bayexpress.value_counts(dropna=False))

display(RALL.FC1_edgeR.value_counts(dropna=False))

display(RALL.FC1_DESeq2.value_counts(dropna=False))

In [None]:
# what are all the NaNs in DESeq2?

counts_RALL.loc[list((RALL.FC1_DESeq2.isna()))]

# well, fair enough

In [None]:
# what are all the NaNs in edgeR?

counts_RALL.loc[list((RALL.FC1_edgeR.isna()))]

# hmm, interesting

In [None]:
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

# Coloring for Colorblindness: https://davidmathlogic.com/colorblind/#%23332288-%23117733-%2344AA99-%2388CCEE-%23DDCC77-%23CC6677-%23AA4499-%23882255


plt.figure(figsize=(3,3), dpi=300)


v = venn3([set(RALL.loc[(RALL.FC2_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_DESeq2 == True)].locus_name)], 
                set_labels = ('bayexpress', 'edgeR', 'DESeq2'),set_colors=("#332288", "#882255", '#DDCC77'), alpha=0.6)

venn3_circles([set(RALL.loc[(RALL.FC2_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC2_DESeq2 == True)].locus_name)],  
                linestyle="-", linewidth=0.0) 


plt.show()

# exported for paper as FC2.png

In [None]:
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

# Coloring for Colorblindness: https://davidmathlogic.com/colorblind/#%23332288-%23117733-%2344AA99-%2388CCEE-%23DDCC77-%23CC6677-%23AA4499-%23882255

plt.figure(figsize=(3,3), dpi=300)


v = venn3([set(RALL.loc[(RALL.FC1_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_DESeq2 == True)].locus_name)], 
                set_labels = ('bayexpress', 'edgeR', 'DESeq2'),set_colors=("#332288", "#882255", '#DDCC77'), alpha=0.6)

venn3_circles([set(RALL.loc[(RALL.FC1_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC1_DESeq2 == True)].locus_name)],  
                linestyle="-", linewidth=0.0) 


plt.show()
# exported for paper as FC1.png

In [None]:
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

# Coloring for Colorblindness: https://davidmathlogic.com/colorblind/#%23332288-%23117733-%2344AA99-%2388CCEE-%23DDCC77-%23CC6677-%23AA4499-%23882255

plt.figure(figsize=(3,3), dpi=300)


v = venn3([set(RALL.loc[(RALL.FC0_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_DESeq2 == True)].locus_name)], 
                set_labels = ('bayexpress', 'edgeR', 'DESeq2'),set_colors=("#332288", "#882255", '#DDCC77'), alpha=0.6)

venn3_circles([set(RALL.loc[(RALL.FC0_bayexpress == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_edgeR == True)].locus_name),    
                set(RALL.loc[(RALL.FC0_DESeq2 == True)].locus_name)],  
                linestyle="-", linewidth=0.0) 


plt.show()
# exported for paper as FC0.png

In [None]:
# Which ones are positive in bayexpress but not the other 2?
# DEG = 'statistically significant change' & | log2 fold change | > 2

display(counts_RALL.loc[list(RALL.loc[(RALL.FC2_bayexpress == True) & (RALL.FC2_edgeR != True) & (RALL.FC2_DESeq2 != True)].locus_name)])


In [None]:
# Which ones are positive in DESeq2 and edgeR but not bayexpress?
# DEG = 'statistically significant change' & | log2 fold change | > 2

display(counts_RALL.loc[list(RALL.loc[(RALL.FC2_bayexpress != True) & (RALL.FC2_edgeR == True) & (RALL.FC2_DESeq2 == True)].locus_name)])


In [None]:
# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.FC, RALL.log2FoldChange, c='#44AA99', s=300,
alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.FC, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.log2FoldChange, 
              
              bins=50, histtype='step', color='#332288', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8)

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (Bayesian framework)')
ax.set_ylabel('log2 fold change (DESeq2)')


In [None]:
RALL.iloc[counts_RALL.reset_index().index[(counts_RALL > 0).all(axis=1)].tolist()]

In [None]:
# Let's check if the non-diagonal points are n = 0 genes
# Here we filter out all genes where at least one replicate has 0 reads mapping to the gene

# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.iloc[counts_RALL.reset_index().index[(counts_RALL > 0).all(axis=1)].tolist()].FC, RALL.iloc[counts_RALL.reset_index().index[(counts_RALL > 0).all(axis=1)].tolist()].log2FoldChange, c='#44AA99', s=300,
alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.FC, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.log2FoldChange, 
              
              bins=50, histtype='step', color='#332288', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8)

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (Bayesian framework)')
ax.set_ylabel('log2 fold change (DESeq2)')

In [None]:
# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.FC, RALL.logFC, c='#44AA99', s=300,
alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.FC, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.logFC, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8)

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (Bayesian framework)')
ax.set_ylabel('log2 fold change (edgeR)')


In [None]:
# FC = bayexpress
# logFC = edgeR
# log2FoldChange = DESeq2


fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

ax.set_xlim(-8, 8)
ax.set_ylim(-8, 8)

# the scatter plot:
ax.scatter(RALL.log2FoldChange, RALL.logFC, c='#44AA99', s=300, alpha=1, marker='+')

# now determine nice limits by hand:

ax_histx.hist(RALL.log2FoldChange, 
              
              bins=50, histtype='step', color='#332288', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.logFC, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

# Add a diagonal line
ax.plot([-8, 8], [-8, 8], ls='--', c='black')  # Draws a diagonal line from (-8, -8) to (8, 8) 

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('log2 fold change (DESeq2)')
ax.set_ylabel('log2 fold change (edgeR)')


In [None]:
# Checking the ones where the fold change doesn't match

display(RALL.loc[((RALL.FC - RALL.log2FoldChange) > 0.5)])

counts_RALL.loc[list(RALL.loc[((RALL.FC - RALL.log2FoldChange) > 0.5)].locus_name)]

# Hah, okay that's Laplace?

# And no deviation with edgeR is explained as they filter out genes with zeros


In [None]:
RALL

In [None]:
# I want to see genes with very low iFC and high Bayes factors 

RALL.loc[(abs(RALL.FC) < 0.5) & (RALL.BF > 100)]


In [None]:
# I want to see genes with very low Bayes factors and high inferred fold change

display(RALL.loc[(abs(RALL.FC) > 2) & (RALL.BF < 1)])

print(list(RALL.loc[(abs(RALL.FC) > 2) & (RALL.BF < 1)].locus_name))


# very very low read numbers for those genes

In [None]:
# P-vlaues (DESeq2) vs. Bayes factors

fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

# ax.set_xlim(-8, 100)
ax.set_ylim(-8, 100)

# the scatter plot:
ax.scatter(RALL.pvalue, RALL.BF, c='black', s=30,
alpha=0.4, marker='x')

# now determine nice limits by hand:

ax_histx.hist(RALL.pvalue, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.BF, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('p-value (edgeR)')
ax.set_ylabel('Bayes factor')


In [None]:
# P-vlaues (edgeR) vs. Bayes factors

fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

# ax.set_xlim(-8, 100)
ax.set_ylim(-8, 100)

# the scatter plot:
ax.scatter(RALL.PValue, RALL.BF, c='black', s=30,
alpha=0.4, marker='x')

# now determine nice limits by hand:

ax_histx.hist(RALL.PValue, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.BF, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('p-value (edgeR)')
ax.set_ylabel('Bayes factor')


In [None]:
# P-vlaues (edgeR) vs. p-value (DESeq2)

fig = plt.figure(figsize=(6, 6), dpi=300)


gs = fig.add_gridspec(2, 2,  width_ratios=(4, 1), height_ratios=(1, 4), left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.05, hspace=0.05)


ax = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)

ax.grid()
ax_histx.tick_params(axis="x", labelbottom=False)
ax_histy.tick_params(axis="y", labelleft=False)

# ax.set_xlim(-8, 100)
# ax.set_ylim(-8, 100)

# the scatter plot:
ax.scatter(RALL.PValue, RALL.pvalue, c='black', s=30,
alpha=0.4, marker='x')

# now determine nice limits by hand:

ax_histx.hist(RALL.PValue, 
              
              bins=50, histtype='step', color='#882255', alpha=0.8, linewidth=2, orientation='vertical')

ax_histy.hist(RALL.pvalue, 
              
              bins=50, histtype='step', color='#117733', alpha=0.8, linewidth=2, orientation='horizontal')

ax_histx.grid()
ax_histy.grid()

ax.set_xlabel('p-value (DESeq2)')
ax.set_ylabel('p-value (edgeR)')


In [None]:
# I want to see genes with very high Bayes factors and high p-values

display(RALL.loc[(abs(RALL.pvalue) > 0.7) & (RALL.BF > 100)])

print(list(RALL.loc[(abs(RALL.pvalue) > 0.7) & (RALL.BF > 100)].locus_name))


## RANK BASED COMPARISON

In [None]:
def calc_rbo(ranked_list_1, ranked_list_2, p):
    try:
        assert 0 < p < 1
    except AssertionError:
        return f"p = {p}, p should be (0,1)."
   
    try:
        assert len(ranked_list_2) > 0 and len(ranked_list_1) > 0
    except AssertionError:
        return f"Error: Empty list supplied [{len(ranked_list_1)}, {len(ranked_list_2)}]"
   
    # dertermine the corresponding shorter and longer lists
    short_list, long_list = (ranked_list_1, ranked_list_2) if len(ranked_list_1) < len(ranked_list_2) else (ranked_list_2, ranked_list_1)
    short_list_len, long_list_len = len(short_list), len(long_list) # s, l
    # from 0 to len(longer_list) calculate intersections
    short_list_set = set()
    long_list_set = set()   
    intersections = {}
    for i in range(long_list_len):
        long_list_set.add(long_list[i])
        if i < short_list_len:
            short_list_set.add(short_list[i])
        intersections[i+1] = float(len(short_list_set.intersection(long_list_set))) # {d: X_{d}}
   
    # term 1
    # \sum_{d=1}^{l} (X_{d}/d)*p^{d}
    # d = i+1, bc python indexing from 0
    term_1 = sum(intersections[i+1]/(i+1)*pow(p,(i+1)) for i in range(long_list_len))
   
    # term 2
    # \sum_{d=s+1}^{l} ((X_{s}(d-s)/sd)*p^{d})
    Xs = intersections[short_list_len]
    Xl = intersections[long_list_len]
    term_2 = sum(((Xs * ( (i+1) - short_list_len)) / (short_list_len*(i+1)) * pow(p, (i+1) )) for i in range(short_list_len, long_list_len))
 
    # term 3
    # [(X_{l} - X_{s}) / l + X_{s} / s] * p^{l}
    term_3 = ((Xl - Xs) / (long_list_len) + (Xs) / (short_list_len) ) * pow(p, long_list_len)
 
    # RBO = ((1-p)/p)*(term_1 +term_2) + term_3
    return ((1-p)/p)*(term_1 +term_2) + term_3

In [None]:
RALL['abs_iFC'] = abs(RALL.FC.to_numpy())
RALL['abs_FC_edgeR'] = abs(RALL.logFC.to_numpy())
RALL['abs_FC_DESeq2'] = abs(RALL.log2FoldChange.to_numpy())


In [None]:
RBO_index_L = [ 'BF_21',
                'FC_edgeR',
                'p_edgeR',
                'FC_DESeq2',
                'p_DESeq2',
                'FC_edgeR',
                'p_edgeR',
                'FC_DESeq2',
                'p_DESeq2',
                'p_edgeR',
                'FC_DESeq2',
                'p_DESeq2',
                'FC_DESeq2',
                'p_DESeq2',
                'p_DESeq2' ]

RBO_index_R = ['iFC',
               'iFC',
               'iFC',
               'iFC',
               'iFC',
               'BF_21',
               'BF_21',
               'BF_21',
               'BF_21',
               'FC_edgeR',
               'FC_edgeR',
               'FC_edgeR',
               'p_edgeR',
               'p_edgeR',
               'FC_DESeq2']

In [None]:
p = 0.1

RBO_1 = []

RBO_1.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_1.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_1, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.2

RBO_2 = []

RBO_2.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_2.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_2, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.3

RBO_3 = []

RBO_3.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_3.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_3, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.4

RBO_4 = []

RBO_4.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_4.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_4, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.5

RBO_5 = []

RBO_5.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_5.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_5, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.6

RBO_6 = []

RBO_6.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_6.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_6, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.7

RBO_7 = []

RBO_7.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_7.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_7, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.8

RBO_8 = []

RBO_8.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_8.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_8, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
p = 0.9

RBO_9 = []

RBO_9.append(calc_rbo(RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_iFC', ascending=False).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='BF', ascending=False).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_edgeR', ascending=False).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))
RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='PValue', ascending=True).locus_name.to_list(), p))

RBO_9.append(calc_rbo(RALL.sort_values(by='pvalue', ascending=True).locus_name.to_list(), RALL.sort_values(by='abs_FC_DESeq2', ascending=False).locus_name.to_list(), p))

RBO_data = pd.DataFrame({'<': RBO_index_L, f'{p}': RBO_9, '>': RBO_index_R})

RBO_data.sort_values(by=f'{p}')


In [None]:
# RANK BASED COMPARISON PLOT


fig = plt.figure(figsize=(6, 6), dpi=300)

ax = fig.add_subplot(1, 1, 1)  # Create an axes object

ax.set_xlim(-0.1, 1.1)
ax.set_ylim(-0.1, 1.1)

# Enable grid
ax.grid(True, which='both', linestyle='--', linewidth=0.5)

# the scatter plot:
ax.scatter(np.full([15],0.1), RBO_1, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.2), RBO_2, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.3), RBO_3, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.4), RBO_4, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.5), RBO_5, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.6), RBO_6, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.7), RBO_7, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.8), RBO_8, c='r', s=35,
alpha=0.8, marker='x')

ax.scatter(np.full([15],0.9), RBO_9, c='r', s=35,
alpha=0.8, marker='x')

ax.set_xticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
ax.set_yticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

ax.set_xlabel('p')
ax.set_ylabel('RBO')

plt.show()  # Don't forget to show the plot


In [None]:
RALL.sort_values(by='BF', ascending=False)