In [None]:
import pandas as pd
import numpy as np
import matplotlib as m
import matplotlib.pyplot as plt

In [None]:
# importing yeast count data

# 42 clean WT replicates
WT_yeast = pd.read_csv('WT_yeast.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast = pd.read_csv('Snf2_yeast.csv', index_col=0)

display(WT_yeast)
display(Snf2_yeast)

In [None]:
# importing yeast count data >>> q-values
# from explore_clean_yeast_consistency.ipynb

# 42 clean WT replicates
WT_yeast_q = pd.read_csv('WT_yeast_q.csv', index_col=0)

# 44 clean Snf2 mutant replciates
Snf2_yeast_q = pd.read_csv('Snf2_yeast_q.csv', index_col=0)

display(WT_yeast_q)
display(Snf2_yeast_q)

In [None]:
# importing length information from https://bluegenes.yeastgenome.org/yeastmine/results/ALL_Yeast_Genes (downloaded on 13-05-2024)

lengths = pd.read_csv('yeastmine_results_2024-05-13T13-30-37.csv', usecols=['Gene > Systematic Name', 'Gene > Length', 'Gene > Transcripts > Length']).rename(columns={'Gene > Systematic Name': 'locus_name', 'Gene > Length': ' gene_length', 'Gene > Transcripts > Length': 'transcript_length'}).groupby(['locus_name'], 
as_index=False).mean()

display(lengths)

In [None]:
# importing results from differential gene expression anaylsis
RALL_bayexpress = pd.read_csv('RALL_bayexpress.csv', index_col=0)

# adding estimated q-value for each gene to the data frame

RALL_bayexpress['q'] = (WT_yeast.iloc[:,1:].sum(axis=1)+1)/(sum(WT_yeast.iloc[:,1:].sum(axis=1))+2)

RALL_bayexpress = RALL_bayexpress.merge(lengths, how='right', on='locus_name')


display(RALL_bayexpress)

In [None]:
# How does transcript length relate to Bayes factors?


fig, ax = plt.subplots(dpi=300)

ax.grid()

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.BF,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')

ax.set_xlabel('transcript length')
ax.set_ylabel('Bayes factor')

plt.show()

# ------------

fig, ax = plt.subplots(dpi=300)

ax.grid()
ax.set_yscale("log", base=10)

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.BF,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')

ax.set_xlabel('transcript length')
ax.set_ylabel('Bayes factor')


plt.show()

# ------------

fig, ax = plt.subplots(dpi=300)

ax.grid()

ax.set_xscale("log", base=10)
ax.set_yscale("log", base=10)

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.BF,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')

ax.set_xlabel('transcript length')
ax.set_ylabel('Bayes factor')



plt.show()

In [None]:
# How does transcript length relate to estimated q-values?


fig, ax = plt.subplots(dpi=300)

ax.grid()

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.q,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')

ax.set_xlabel('transcript length')
ax.set_ylabel('q')

plt.show()

# ------------

fig, ax = plt.subplots(dpi=300)

ax.grid()
ax.set_yscale("log", base=10)

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.q,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')

ax.set_xlabel('transcript length')
ax.set_ylabel('q')


plt.show()

# ------------

fig, ax = plt.subplots(dpi=300)

ax.grid()

ax.set_xscale("log", base=10)
ax.set_yscale("log", base=10)

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.q,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')

ax.set_xlabel('transcript length')
ax.set_ylabel('q')



plt.show()

In [None]:
# How does gene length relate to Bayes factors?


fig, ax = plt.subplots(dpi=300)

ax.grid()

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.BF,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')


plt.show()

# ------------

fig, ax = plt.subplots(dpi=300)

ax.grid()
ax.set_yscale("log", base=10)

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.BF,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')


plt.show()

# ------------

fig, ax = plt.subplots(dpi=300)

ax.grid()

ax.set_xscale("log", base=10)
ax.set_yscale("log", base=10)

ax.scatter(RALL_bayexpress.transcript_length, RALL_bayexpress.BF,
            
c='#332288', s=30, 
alpha=0.3, edgecolors='none')


plt.show()