This notebook is to generate plots for data driven simulation with signals (Figure 4).

In [1]:
import pickle

import matplotlib.pyplot as plt
import pylab
import numpy as np
%matplotlib notebook

## DIBD Data Driven Simulations

### Varing filtering level (Figure 4a, c, e)

In [2]:
# input DIBD data driven simulation results with various filteirng level
with open("../results_all/simulation_DIBD_amnon_mixed_fl_s15.pkl", 'rb') as f:
    filtlev, B, c, nSample, num_nulls,FDR_bh1, FDR_fbh1, FDR_ds1, FDR_gb1,PWR_bh1, PWR_fbh1, PWR_ds1, PWR_gb1,OTU_bh1, OTU_fbh1, OTU_ds1, OTU_gb1,err_bh1, err_fbh1, err_ds1, err_gb1,sd_bh1, sd_fbh1, sd_ds1, sd_gb1 = pickle.load(f)

In [3]:
# plot post filtering hypotheses vs. filter level (Figure 4a)
fig_dibd_fl_nulls = plt.figure(figsize=(7, 5))
plt.plot(np.delete(filtlev, 0), np.delete(num_nulls, 0), linestyle='--', marker='o', color = 'purple', markeredgewidth=0.0)

plt.xlabel('Filter level', fontsize=20)
plt.ylabel('Post filtering hypotheses', fontsize=20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_dibd_fl_nulls.savefig('../figures/dibd_simulation_signal_fl_nulls.pdf', dpi=300)

<IPython.core.display.Javascript object>

In [4]:
# plot FDR level vs. filter level (Figure 4c)
fig_dibd_fl_fdr = plt.figure(figsize=(7, 5))
plt.axhline(0.1, linestyle='--', color = 'green', markeredgewidth=0.0, label = 'nominal level')
plt.plot(filtlev, FDR_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red', label = 'DS')
plt.plot(filtlev, FDR_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange', label = 'FBH')
plt.plot(filtlev, FDR_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue', label = 'BH')

pylab.legend(loc ='center right', frameon=False)
plt.xlabel('Filter level', fontsize = 20)
plt.ylabel('False discovery rate', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_dibd_fl_fdr.savefig('../figures/dibd_simulation_signal_fl_fdr.pdf', dpi=300)

<IPython.core.display.Javascript object>

In [5]:
# plot discovered OTU vs. filter level (Figure 4e)
fig_dibd_fl_otu = plt.figure(figsize=(7, 5))

plt.plot(filtlev, OTU_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(filtlev, OTU_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(filtlev, OTU_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Filter level', fontsize = 20)
plt.ylabel('OTUs discovered', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_dibd_fl_otu.savefig('../figures/dibd_simulation_signal_fl_otu.pdf', dpi=300)

<IPython.core.display.Javascript object>

## Varying sample size (Figure 4g, i)

In [6]:
# input DIBD data driven simulation results with various sample size
with open("../results_all/simulation_DIBD_amnon_mixed_s_fl10_10k.pkl", "rb") as f:
    filtlev, B, c, sample_range, FDR_bh1, FDR_fbh1, FDR_ds1, FDR_gb1,PWR_bh1, PWR_fbh1, PWR_ds1, PWR_gb1,OTU_bh1, OTU_fbh1, OTU_ds1, OTU_gb1,err_bh1, err_fbh1, err_ds1, err_gb1,sd_bh1, sd_fbh1, sd_ds1, sd_gb1 = pickle.load(f)

In [7]:
# plot FDR level vs. samples per group (Figure 4g)
fig_dibd_ds_fdr = plt.figure(figsize=(7, 5))
plt.axhline(0.1, linestyle='--', color = 'green', markeredgewidth=0.0)
plt.plot(sample_range, FDR_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(sample_range, FDR_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(sample_range, FDR_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Samples per group', fontsize = 20)
plt.ylabel('False discovery rate', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_dibd_ds_fdr.savefig('../figures/dibd_simulation_signal_ds_fdr.pdf', dpi=300)

<IPython.core.display.Javascript object>

In [8]:
# plot discovered OTU vs. samples per group (Figure 4i)
fig_dibd_ds_otu = plt.figure(figsize=(7, 5))
plt.plot(sample_range, OTU_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(sample_range, OTU_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(sample_range, OTU_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Samples per group', fontsize = 20)
plt.ylabel('OTUs discovered', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_dibd_ds_otu.savefig('../figures/dibd_simulation_signal_ds_otu.pdf', dpi=300)

<IPython.core.display.Javascript object>

### SCALE K VARIES (Supplementary table S2)

In [9]:
# input DIBD data driven simulation results with various scale K value
with open("../results_all/simulation_DIBD_amnon_mixed_k_B5k.pkl", "rb") as f:
    diff, filtlev, B, c, nSample,FDR_bh1, FDR_fbh1, FDR_ds1, FDR_gb1,PWR_bh1, PWR_fbh1, PWR_ds1, PWR_gb1, OTU_bh1, OTU_fbh1, OTU_ds1, OTU_gb1,err_bh1, err_fbh1, err_ds1, err_gb1,sd_bh1, sd_fbh1, sd_ds1, sd_gb1 = pickle.load(f)

In [10]:
print(diff)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]


In [11]:
print(np.around(FDR_bh1, decimals=4))

[ 0.0027  0.016   0.0194  0.0196  0.0202  0.0199  0.0203  0.02    0.0203
  0.0199  0.0202  0.0203  0.0203  0.02    0.0199  0.0206  0.0202  0.0201
  0.0207]


In [12]:
print(np.around(FDR_fbh1, decimals=4))

[ 0.0123  0.0288  0.0323  0.0325  0.034   0.0337  0.0346  0.0344  0.0348
  0.0346  0.0356  0.0361  0.0363  0.0363  0.0364  0.0371  0.0374  0.037
  0.038 ]


In [13]:
print(np.around(FDR_ds1, decimals=4))

[ 0.0171  0.0366  0.0384  0.0389  0.0409  0.0403  0.0412  0.0411  0.041
  0.041   0.0415  0.0419  0.0418  0.0414  0.0417  0.0422  0.0427  0.0418
  0.0427]


In [14]:
print(np.around(PWR_bh1, decimals=4))

[ 0.0017  0.1725  0.5217  0.6549  0.7017  0.725   0.741   0.7527  0.7616
  0.7693  0.818   0.846   0.8647  0.8808  0.8924  0.9004  0.9072  0.912
  0.9162]


In [15]:
print(np.around(PWR_fbh1, decimals=4))

[ 0.0059  0.2535  0.5766  0.6853  0.727   0.7482  0.7629  0.774   0.7815
  0.7885  0.8347  0.8606  0.8778  0.8925  0.9028  0.9101  0.916   0.9204
  0.9243]


In [16]:
print(np.around(PWR_ds1, decimals=4))

[ 0.0093  0.2868  0.5941  0.6944  0.734   0.7545  0.7688  0.7792  0.7865
  0.7934  0.8387  0.8638  0.8809  0.8952  0.9053  0.9122  0.918   0.9223
  0.9264]


## CS Data Driven Simulations (Figures 4b, d, f, h, j)

### Varying filtering level (Figure 4b, d, f )

In [17]:
# input CS data driven simulation results with various filteirng level
with open("../results_all/simulation_CS_amnon_mixed_fl_s15.pkl", "rb") as f:
    filtlev, B, c, nSample, num_nulls, FDR_bh1, FDR_fbh1, FDR_ds1, FDR_gb1, PWR_bh1, PWR_fbh1, PWR_ds1, PWR_gb1,OTU_bh1, OTU_fbh1, OTU_ds1, OTU_gb1,err_bh1, err_fbh1, err_ds1, err_gb1,sd_bh1, sd_fbh1, sd_ds1, sd_gb1 = pickle.load(f)

In [18]:
# plot post filtering hypotheses vs. filter level (Figure 4b)
fig_cs_fl_nulls = plt.figure(figsize=(7, 5))
plt.plot(np.delete(filtlev, 0), np.delete(num_nulls, 0), linestyle='--', marker='o', color = 'purple', markeredgewidth=0.0)
plt.xlabel('Filter level', fontsize=20)
plt.ylabel('Post filtering hypotheses', fontsize=20)

plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_cs_fl_nulls.savefig('../figures/cs_simulation_signal_fl_nulls.pdf', dpi=300)

<IPython.core.display.Javascript object>

In [19]:
# plot FDR level vs. filter level (Figure 4d)
fig_cs_fl_fdr = plt.figure(figsize=(7, 5))
plt.axhline(0.1, linestyle='--', color = 'green', markeredgewidth=0.0)
plt.plot(filtlev, FDR_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(filtlev, FDR_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(filtlev, FDR_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Filter level', fontsize = 20)
plt.ylabel('False discovery rate', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_cs_fl_fdr.savefig('../figures/cs_simulation_signal_fl_fdr.pdf', dpi=300)

<IPython.core.display.Javascript object>

In [20]:
# plot discovered OTU vs. filter level (Figure 4f)
fig_cs_fl_otu = plt.figure(figsize=(7, 5))
plt.plot(filtlev, OTU_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(filtlev, OTU_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(filtlev, OTU_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Filter level', fontsize = 20)
plt.ylabel('OTUs discovered', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_cs_fl_otu.savefig('../figures/cs_simulation_signal_fl_otu.pdf', dpi=300)

<IPython.core.display.Javascript object>

### Varing sample size (Figure 4h, j)

In [21]:
# input CS data driven simulation results with various sample size
with open("../results_all/simulation_CS_amnon_mixed_s_fl10.pkl", "rb") as f:
    filtlev, B, c, sample_range, FDR_bh1, FDR_fbh1, FDR_ds1, FDR_gb1,PWR_bh1, PWR_fbh1, PWR_ds1, PWR_gb1,OTU_bh1, OTU_fbh1, OTU_ds1, OTU_gb1,err_bh1, err_fbh1, err_ds1, err_gb1,sd_bh1, sd_fbh1, sd_ds1, sd_gb1= pickle.load(f)

In [22]:
# plot FDR level vs. samples per group (Figure 4h)
fig_cs_ds_fdr = plt.figure(figsize=(7, 5))
plt.axhline(0.1, linestyle='--', color = 'green', markeredgewidth=0.0)
plt.plot(sample_range, FDR_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(sample_range, FDR_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(sample_range, FDR_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Samples per group', fontsize = 20)
plt.ylabel('False discovery rate', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_cs_ds_fdr.savefig('../figures/cs_simulation_signal_ds_fdr.pdf', dpi=300)

<IPython.core.display.Javascript object>

In [23]:
# plot discovered OTU vs. samples per group (Figure 4j)
fig_cs_ds_otu = plt.figure(figsize=(7, 5))
plt.plot(sample_range, OTU_ds1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'red')
plt.plot(sample_range, OTU_fbh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'orange')
plt.plot(sample_range, OTU_bh1, linestyle='--', marker='o', markeredgewidth=0.0, color = 'blue')

plt.xlabel('Samples per group', fontsize = 20)
plt.ylabel('OTUs discovered', fontsize = 20)
plt.tick_params(labeltop='off', labelright='off')
plt.tick_params(labelsize=15)
plt.tight_layout()
fig_cs_ds_otu.savefig('../figures/cs_simulation_signal_ds_otu.pdf', dpi=300)

<IPython.core.display.Javascript object>

### SCALE K VARIES (Supplementary table S3)

In [24]:
# input CS data driven simulation results with various scale K value
with open("../results_all/simulation_cs_amnon_mixed_k_B5k.pkl", "rb") as f:
    diff, filtlev, B, c, nSample,FDR_bh1, FDR_fbh1, FDR_ds1, FDR_gb1,PWR_bh1, PWR_fbh1, PWR_ds1, PWR_gb1, OTU_bh1, OTU_fbh1, OTU_ds1, OTU_gb1,err_bh1, err_fbh1, err_ds1, err_gb1,sd_bh1, sd_fbh1, sd_ds1, sd_gb1 = pickle.load(f)

In [25]:
print(diff)
print(B, nSample, filtlev, c)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
5000 15 10 0.1


In [26]:
print(np.nanmax(err_bh1), np.nanmax(err_fbh1), np.nanmax(err_ds1))

0.000158063235052 0.000321534832168 0.0006219136403


In [27]:
print(np.around(FDR_bh1, decimals=4))

[ 0.0002  0.014   0.0163  0.0164  0.0165  0.0164  0.0165  0.0166  0.0165
  0.0167  0.0167  0.0166  0.0166  0.0166  0.0166  0.0165  0.0167  0.0166
  0.0167]


In [28]:
print(np.around(FDR_fbh1, decimals=4))

[ 0.0038  0.0255  0.0315  0.032   0.0319  0.0319  0.0321  0.0322  0.032
  0.0322  0.0321  0.0319  0.0319  0.032   0.032   0.0318  0.0322  0.0318
  0.0319]


In [29]:
print(np.around(FDR_ds1, decimals=4))

[ 0.0136  0.0316  0.0364  0.036   0.0359  0.0359  0.0361  0.0361  0.0359
  0.0361  0.036   0.0358  0.0357  0.0359  0.0358  0.0358  0.0361  0.0357
  0.0359]


In [30]:
print(np.around(PWR_bh1, decimals=4))

[  5.00000000e-04   3.91900000e-01   8.12700000e-01   9.16600000e-01
   9.40300000e-01   9.49300000e-01   9.54500000e-01   9.57800000e-01
   9.60600000e-01   9.62600000e-01   9.73400000e-01   9.78700000e-01
   9.82400000e-01   9.85100000e-01   9.87000000e-01   9.88400000e-01
   9.89500000e-01   9.90300000e-01   9.91200000e-01]


In [31]:
print(np.around(PWR_fbh1, decimals=4))

[ 0.0054  0.5006  0.8553  0.9311  0.9491  0.9562  0.9605  0.9632  0.9656
  0.9675  0.9769  0.9817  0.9849  0.9872  0.9889  0.9901  0.9911  0.9919
  0.9926]


In [32]:
print(np.around(PWR_ds1, decimals=4))

[ 0.0166  0.5414  0.8657  0.9348  0.9514  0.9581  0.9622  0.9647  0.967
  0.9689  0.9779  0.9825  0.9856  0.9878  0.9894  0.9906  0.9916  0.9923
  0.993 ]
