In [None]:
import pandas as pd
import numpy as np
import matplotlib as m
import matplotlib.pyplot as plt
from scipy.stats import wasserstein_distance

In [None]:
# importing yeast count data

# 42 clean WT replicates
WT_yeast = pd.read_csv('WT_yeast.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast = pd.read_csv('Snf2_yeast.csv', index_col=0)

display(WT_yeast)
display(Snf2_yeast)

In [None]:
# importing yeast count data >>> q-values
# from explore_clean_yeast_consistency.ipynb

# 42 clean WT replicates
WT_yeast_q = pd.read_csv('WT_yeast_q.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast_q = pd.read_csv('Snf2_yeast_q.csv', index_col=0)

display(WT_yeast_q)
display(Snf2_yeast_q)

In [None]:
# importing yeast count data >>> average q-values
# from explore_clean_yeast_consistency.ipynb

# 42 clean WT replicates
WT_yeast_avq = pd.read_csv('WT_yeast_avq.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast_avq = pd.read_csv('Snf2_yeast_avq.csv', index_col=0)

display(WT_yeast_avq)
display(Snf2_yeast_avq)

In [None]:
# importing results from differential gene expression anaylsis
RALL_bayexpress = pd.read_csv('RALL_bayexpress.csv', index_col=0)

RALL_bayexpress['W'] = [wasserstein_distance(list(WT_yeast.iloc[i,1:]), list(Snf2_yeast.iloc[i,3:])) for i in range(len(RALL_bayexpress))]

display(RALL_bayexpress)

In [None]:
# importing bootstrapping results
# via package_comp_numbers.ipynb

bayexpress_BFNtrue = pd.read_csv('bayexpress_BFNtrue.csv').set_index('locus_name')
display(bayexpress_BFNtrue)

bayexpress_Ntrue = pd.read_csv('bayexpress_Ntrue.csv').set_index('locus_name')
display(bayexpress_Ntrue)

edgeR_Ntrue = pd.read_csv('edgeR_Ntrue.csv').set_index('genes')
display(edgeR_Ntrue)

DESeq2_Ntrue = pd.read_csv('DESeq2_Ntrue.csv').set_index('genes')
display(DESeq2_Ntrue)

In [None]:
# define function to get plots showing the package comparison numbers
# all the info from explore_clean_yeast.ipynb
# plus consistency tests
# PCN plots, 100 bootstrapping iterations


def stalk(genes):

    # Find the index of all genes
    igenes = [list(RALL_bayexpress.locus_name).index(gene) for gene in genes]
    
    # Some more numbers about the genes

    print('Read counts in WT across replicates:')
    display(WT_yeast.iloc[igenes])

    print('Read counts in Snf2 mutant across replicates:')
    display(Snf2_yeast.iloc[igenes])

    print('bayexpress results:')
    display(RALL_bayexpress.iloc[igenes])

    print('With:')
    print('BF ... Bayes factor for differential gene expression analysis')
    print('FC ... inferred log_2 fold change')
    print('nBF_WT ... Bayes factor for testing wether replicates are consistent with each other in wild-type')
    print('AOTP_WT ... genes that have been identified to be All Over The Place in WT meaning it is part of the list of highy variable genes idenified in bootstrapping experiments. If AOTP == True, the gene is marked*.')
    print('nBF_Snf2 ... Bayes factor for testing wether replicates are consistent with each other in Snf2-mutant')
    print('AOTP_Snf2 ... genes that have been identified to be All Over The Place in the mutant meaning it is part of the list of highy variable genes idenified in bootstrapping experiments. If AOTP == True, the gene is marked*.')

    # printing q-plots
    for t in igenes:
        fig, ax = plt.subplots(dpi=300)

        ax.hist(WT_yeast_q.iloc[t], 10, density=False, histtype='step', color='#332288',
                alpha=0.4, orientation='horizontal')

        ax.hist(Snf2_yeast_q.iloc[t], 10, density=False, histtype='step', color='#DDCC77',
                alpha=0.4, orientation='horizontal')


        # scatter WT
        gene_WT_nBF = str(RALL_bayexpress.loc[t].nBF_WT.round(3))
        gene_ATOP_WT = RALL_bayexpress.loc[t].AOTP_WT
        ax.scatter(np.arange(1,len(WT_yeast_q.columns)+1), WT_yeast_q.iloc[t],
                
            c='#332288', s=30, 
            label=f'BF = {gene_WT_nBF}{"*" if gene_ATOP_WT else ""}',
            alpha=0.9, edgecolors='none')
        
        # scatter Snf2
        gene_Snf2_nBF = str(RALL_bayexpress.loc[t].nBF_Snf2.round(3))
        gene_ATOP_Snf2 = RALL_bayexpress.loc[t].AOTP_Snf2
        ax.scatter(np.arange(1,len(Snf2_yeast_q.columns)+1), Snf2_yeast_q.iloc[t],
                
            c='#DDCC77', s=30, 
            label=f'BF = {gene_Snf2_nBF}{"*" if gene_ATOP_Snf2 else ""}',
            alpha=0.9, edgecolors='none')
        
        ax.legend(loc='upper right')
        
        # average estimates WT
        series = WT_yeast_avq.iloc[t,1:]
        series.plot(color='#332288', alpha=0.3, linewidth=10)
        
        # average estimates Snf2
        series = Snf2_yeast_avq.iloc[t,1:]
        series.plot(color='#DDCC77', alpha=0.3, linewidth=10)
            
        gene = str(RALL_bayexpress.loc[t].locus_name)
        gene_BF = str(RALL_bayexpress.loc[t].BF.round(3))
        gene_iFC = str(RALL_bayexpress.loc[t].FC.round(3))
        gene_W = str(RALL_bayexpress.loc[t].W.round(3))
        ax.set_title(f'{gene}: BF = {gene_BF}, iFC = {gene_iFC}, W = {gene_W}')

        # Use tex in labels
        ax.set_xticks(np.arange(1,44))
        ax.set_xticklabels('')

        # formatting y axis ticks
        plt.gca().ticklabel_format(axis='y', style='plain', useOffset=False)

        ax.set_xlabel('Replicates')
        ax.set_ylabel('q = (n+1) / (N+2)')

        plt.show()

    # package number comparison plots
    reps = ['3/3 R', '6/6 R', '12/12 R', '20/20 R']

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(1,4, dpi=300, figsize=(8,3))

    # Bayes factors

    im = ax1.imshow(bayexpress_Ntrue.loc[genes].values, cmap='cividis', alpha=0.3, norm=m.colors.Normalize(vmin=0, vmax=100))

    ax1.set_xticks(np.arange(len(reps)), labels=reps)
    ax1.set_yticks(np.arange(len(genes)), labels=genes)

    # Rotate the tick labels and set their alignment.
    plt.setp(ax1.get_xticklabels(), rotation=45, ha="right",
            rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(genes)):
        for j in range(len(reps)):
            text = ax1.text(j, i, bayexpress_Ntrue.loc[genes].values[i, j],
                        ha="center", va="center", color="black")
    ax1.set_title('BF')

    # edgeR

    im = ax2.imshow(edgeR_Ntrue.loc[genes].values, cmap='cividis', alpha=0.3, norm=m.colors.Normalize(vmin=0, vmax=100))

    ax2.set_xticks(np.arange(len(reps)), labels=reps)
    ax2.set_yticks([], labels=[])

    # Rotate the tick labels and set their alignment.
    plt.setp(ax2.get_xticklabels(), rotation=45, ha="right",
            rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(genes)):
        for j in range(len(reps)):
            text = ax2.text(j, i, edgeR_Ntrue.loc[genes].values[i, j],
                        ha="center", va="center", color="black")
    ax2.set_title('edgeR')



    # DESeq2

    im = ax3.imshow(DESeq2_Ntrue.loc[genes].values, cmap='cividis', alpha=0.3, norm=m.colors.Normalize(vmin=0, vmax=100))

    ax3.set_xticks(np.arange(len(reps)), labels=reps)
    ax3.set_yticks([], labels=[])

    # Rotate the tick labels and set their alignment.
    plt.setp(ax3.get_xticklabels(), rotation=45, ha="right",
            rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(genes)):
        for j in range(len(reps)):
            text = ax3.text(j, i, DESeq2_Ntrue.loc[genes].values[i, j],
                        ha="center", va="center", color="black")
    ax3.set_title('DESeq2')


    # Bayes factors only

    im = ax4.imshow(bayexpress_BFNtrue.loc[genes].values, cmap='cividis', alpha=0.3, norm=m.colors.Normalize(vmin=0, vmax=100))

    ax4.set_xticks(np.arange(len(reps)), labels=reps)
    ax4.set_yticks([], labels=[])

    # Rotate the tick labels and set their alignment.
    plt.setp(ax4.get_xticklabels(), rotation=45, ha="right",
            rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(genes)):
        for j in range(len(reps)):
            text = ax4.text(j, i, bayexpress_BFNtrue.loc[genes].values[i, j],
                        ha="center", va="center", color="black")
    ax4.set_title('BF > 1')
    plt.show()

In [None]:
stalk(['YAL016C-B', 'YAL031W-A', 'YAR035W', 'YAR068W'])

In [None]:
stalk(['YGL228W', 'YBR078W', 'YIL094C', 'YGL253W'])

In [None]:
stalk(['YGR192C', 'YOR383C', 'YHR174W', 'YDR077W'])

In [None]:
stalk(['YNL232W', 'YLR329W', 'YDR291W', 'YPR164W'])

In [None]:
stalk(['snR32', 'YPL032C', 'YNL034W', 'YPL030W'])

In [None]:
from sklearn.neighbors import KernelDensity
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X)
kde.score_samples(X)

In [None]:
stalk(['YHR174W'])

In [None]:
stalk(['YDL062W'])