In [None]:
import pandas as pd
import numpy as np
import matplotlib as m
import matplotlib.pyplot as plt

In [None]:
# importing yeast count data

# 42 clean WT replicates
WT_yeast = pd.read_csv('WT_yeast.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast = pd.read_csv('Snf2_yeast.csv', index_col=0)

display(WT_yeast)
display(Snf2_yeast)

In [None]:
# importing yeast count data >>> q-values
# from explore_clean_yeast_consistency.ipynb

# 42 clean WT replicates
WT_yeast_q = pd.read_csv('WT_yeast_q.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast_q = pd.read_csv('Snf2_yeast_q.csv', index_col=0)

display(WT_yeast_q)
display(Snf2_yeast_q)

In [None]:
# importing yeast count data >>> average q-values
# from explore_clean_yeast_consistency.ipynb

# 42 clean WT replicates
WT_yeast_avq = pd.read_csv('WT_yeast_avq.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast_avq = pd.read_csv('Snf2_yeast_avq.csv', index_col=0)

display(WT_yeast_avq)
display(Snf2_yeast_avq)

In [None]:
# importing results from differential gene expression anaylsis
RALL_bayexpress = pd.read_csv('RALL_bayexpress.csv', index_col=0)

display(RALL_bayexpress)

In [None]:
# importing bootstrapping results
# via package_comp_numbers.ipynb

bayexpress_BFNtrue = pd.read_csv('bayexpress_BFNtrue.csv').set_index('locus_name')
display(bayexpress_BFNtrue)

bayexpress_Ntrue = pd.read_csv('bayexpress_Ntrue.csv').set_index('locus_name')
display(bayexpress_Ntrue)

edgeR_Ntrue = pd.read_csv('edgeR_Ntrue.csv').set_index('genes')
display(edgeR_Ntrue)

DESeq2_Ntrue = pd.read_csv('DESeq2_Ntrue.csv').set_index('genes')
display(DESeq2_Ntrue)

In [None]:
def q_plot(genes):

    # Find the index of all genes
    igenes = [list(RALL_bayexpress.locus_name).index(gene) for gene in genes]
    
    # Some more numbers about the genes

    print('Read counts in WT across replicates:')
    display(WT_yeast.iloc[igenes])

    print('Read counts in Snf2 mutant across replicates:')
    display(Snf2_yeast.iloc[igenes])

    print('bayexpress results:')
    display(RALL_bayexpress.iloc[igenes])

    print('With:')
    print('BF ... Bayes factor for differential gene expression analysis')
    print('FC ... inferred log_2 fold change')
    print('nBF_WT ... Bayes factor for testing wether replicates are consistent with each other in wild-type')
    print('AOTP_WT ... genes that have been identified to be All Over The Place in WT meaning it is part of the list of highy variable genes idenified in bootstrapping experiments. If AOTP == True, the gene is marked*.')
    print('nBF_Snf2 ... Bayes factor for testing wether replicates are consistent with each other in Snf2-mutant')
    print('AOTP_Snf2 ... genes that have been identified to be All Over The Place in the mutant meaning it is part of the list of highy variable genes idenified in bootstrapping experiments. If AOTP == True, the gene is marked*.')

        # printing q-plots
    for t in igenes:
        fig, ax = plt.subplots(dpi=300)

        ax.hist(WT_yeast_q.iloc[t], 10, density=False, histtype='step', color='#332288',
                alpha=0.4, orientation='horizontal')

        ax.hist(Snf2_yeast_q.iloc[t], 10, density=False, histtype='step', color='#DDCC77',
                alpha=0.4, orientation='horizontal')


        # scatter WT
        gene_WT_nBF = str(RALL_bayexpress.loc[t].nBF_WT.round(3))
        ax.scatter(np.arange(1,len(WT_yeast_q.columns)+1), WT_yeast_q.iloc[t],
                
            c='#332288', s=30, 
            label=f'BF_IC = {gene_WT_nBF}',
            alpha=0.9, edgecolors='none')
        
        # scatter Snf2
        gene_Snf2_nBF = str(RALL_bayexpress.loc[t].nBF_Snf2.round(3))
        ax.scatter(np.arange(1,len(Snf2_yeast_q.columns)+1), Snf2_yeast_q.iloc[t],
                
            c='#DDCC77', s=30, 
            label=f'BF_IC = {gene_Snf2_nBF}',
            alpha=0.9, edgecolors='none')
        
        ax.legend(loc='upper right')
        
        # average estimates WT
        series = WT_yeast_avq.iloc[t,1:]
        series.plot(color='#332288', alpha=0.3, linewidth=10)
        
        # average estimates Snf2
        series = Snf2_yeast_avq.iloc[t,1:]
        series.plot(color='#DDCC77', alpha=0.3, linewidth=10)
            
        gene = str(RALL_bayexpress.loc[t].locus_name)
        gene_BF = str(RALL_bayexpress.loc[t].BF.round(3))
        gene_iFC = str(RALL_bayexpress.loc[t].FC.round(3))
        ax.set_title(f'{gene}:      BF_21 = {gene_BF}, iFC = {gene_iFC}')

        # Use tex in labels
        ax.set_xticks(np.arange(1,44))
        ax.set_xticklabels('')

        # formatting y axis ticks
        plt.gca().ticklabel_format(axis='y', style='plain', useOffset=False)

        ax.set_xlabel('Replicates')
        ax.set_ylabel('q = (n+1) / (N+2)')

        plt.show()

    plt.show()

In [None]:
# Let's find an example gene for every group in the venn diagramm
# In collaboration with package_comparison_RALL.ipynb

# DEG = 'statistically significant change' & | log2 fold change | > 1

# Which ones are positive in bayexpress but not the other 2?
# 138 genes
# YAL016C-B

# Which ones are positive in edgeR but not the other 2?
# 0 genes
# -

# Which ones are positive in DESeq2 but not the other 2?
# 50 genes
# YDL062W

# Which ones are positive in DESeq2 and edgeR but not bayexpress?
# 98 genes
# RPR1

# Which ones are positive in DESeq2 and bayexpress but not edgeR?
# 52 genes
# YAL063C-A

# Which ones are positive in edgeR and bayexpress but not DESeq2?
# 1 gene
# YGR161W-C

# Which ones are positive hits in all 3?
# 342 genes
# YAL061W

# Which ones are not DEG in all 3?
# 6445 genes
# 15S_rRNA

In [None]:
q_plot(['YAL016C-B', 'YDL062W', 'RPR1', 'YAL063C-A', 'YGR161W-C', 'YAL061W', '15S_rRNA'])