In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

from pathlib import Path
import pandas as pd
import numpy as np
import math

import seaborn as sns

from downstream.signals.signal_r2_permutation_test import collect_paths
from downstream.signals.signal_pca_fit_error_pvalue_permutation_test import process

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [5]:
signals_root = Path("/mnt/stripe/bio/experiments/signal_experiments")

Selected Paths:  1582 of 19932


In [37]:
def pvalue_for(normalization):
#     simulations = 4
    simulations = 100001
    threads = 8

    output_path = signals_root / "validate.{}.pvalue.{}.csv".format(normalization, simulations)
    print("Results file:", str(output_path), "[exists]" if output_path.exists() else "[not exists]")

    if not output_path.exists():
        print("  calculate:", str(output_path.name))

        # Collect files:
        paths_filter = "_{}.tsv".format(normalization)
        all_paths = collect_paths(signals_root)
        #datatypes = ["H3K4me1", "H3K4me3", "H3K27ac", "H3K36me3", "H3K27me3", "meth"]
        paths = [p for p in all_paths if paths_filter in p.name]
        print("Selected Paths: ", len(paths), "of", len(all_paths))

        # Calc pvalues:
        process(paths, str(output_path), seed=100, simulations=simulations, threads=threads, fdr=True)
        
    return output_path

In [68]:
def fdr_control(normalization, fdr=0.05):
    df_path = pvalue_for(normalization)
    df = pd.DataFrame.from_csv(df_path, index_col=None)
    df["loci"] = [Path(f).name for f in df.file]
    print("First pvalue failed FDR control:")
    print(*list(zip(df.columns, df.iloc[next(i for i, t in enumerate(df.pvalue_corr >= fdr) if t),:])),
         sep="\n")
    
    fdf = df[df.pvalue_corr < fdr]
    print("\nPasses FDR control:", len(fdf))
    return  fdf

In [69]:
fdr_control("rawq")

Results file: /mnt/stripe/bio/experiments/signal_experiments/validate.rawq.pvalue.100001.csv [exists]
First pvalue failed FDR control:
('modification', 'meth')
('file', '/mnt/stripe/bio/experiments/signal_experiments/meth/cuffdiff_g_transcript')
('normalization', 'rawq')
('pvalue', 0.0009699806003879922)
('pvalue_corr', 0.13150165568117209)
('loci', 'cuffdiff_g_transcript')

Passes FDR control: 12


Unnamed: 0,modification,file,normalization,pvalue,pvalue_corr,loci
0,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,cpg_minavcov10_complex_4outliers.narrow.adjust...
1,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diffReps_H3K27ac_both
2,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diffReps_broad_H3K27ac_both
3,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,conservation_regions_0.7_0.8
4,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diffReps_H3K27me3_both
5,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diffReps_broad_input_H3K27me3_both
6,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diff_OD_YD_H3K27me3_zinbra_200_0.01_10_both
7,H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diff_OD_YD_H3K4me1_zinbra_200_1.0E-4_10_both
8,H3K4me3,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,diff_OD_YD_H3K4me3_zinbra_200_0.05_5_both
9,H3K4me3,/mnt/stripe/bio/experiments/signal_experiments...,rawq,1e-05,0.001582,conservation_regions_0.6_0.7


In [70]:
fdr_control("fripz")

Results file: /mnt/stripe/bio/experiments/signal_experiments/validate.fripz.pvalue.100001.csv [exists]
First pvalue failed FDR control:
('modification', 'H3K4me1')
('file', '/mnt/stripe/bio/experiments/signal_experiments/H3K4me1/cpg_minavcov10_complex_4outliers.narrow.adjusted.regions.filtered')
('normalization', 'fripz')
('pvalue', 0.00032999340013199736)
('pvalue_corr', 0.065256194876102472)
('loci', 'cpg_minavcov10_complex_4outliers.narrow.adjusted.regions.filtered')

Passes FDR control: 7


Unnamed: 0,modification,file,normalization,pvalue,pvalue_corr,loci
0,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,cpg_minavcov10_complex_4outliers.narrow.adjust...
1,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,diffReps_H3K27ac_both
2,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,diffReps_H3K27me3_both
3,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,diffReps_broad_input_H3K27me3_both
4,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,diff_OD_YD_H3K27me3_zinbra_200_0.01_10_both
5,H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,diff_OD_YD_H3K4me1_zinbra_200_1.0E-4_10_both
6,H3K4me3,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,0.00226,diff_OD_YD_H3K4me3_zinbra_200_0.05_5_both


In [71]:
fdr_control("fripm")

Results file: /mnt/stripe/bio/experiments/signal_experiments/validate.fripm.pvalue.100001.csv [exists]
First pvalue failed FDR control:
('modification', 'H3K27me3')
('file', '/mnt/stripe/bio/experiments/signal_experiments/H3K27me3/diffReps_H3K27me3_both')
('normalization', 'fripm')
('pvalue', 0.0009699806003879922)
('pvalue_corr', 0.066717796078861025)
('loci', 'diffReps_H3K27me3_both')

Passes FDR control: 22


Unnamed: 0,modification,file,normalization,pvalue,pvalue_corr,loci
0,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,cpg_minavcov10_complex_4outliers.narrow.adjust...
1,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diff_OD_YD_H3K27ac_zinbra_200_1.0E-4_10_young
2,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diff_OD_YD_H3K27ac_zinbra_200_1.0E-4_10_both
3,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diffReps_H3K27ac_old
4,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diffReps_H3K27ac_both
5,H3K27ac,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diffReps_broad_H3K27ac_both
6,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diffReps_broad_H3K27me3_young
7,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diff_OD_YD_H3K27me3_zinbra_200_0.01_10_young
8,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diff_OD_YD_H3K27me3_zinbra_200_0.01_10_old
9,H3K27me3,/mnt/stripe/bio/experiments/signal_experiments...,fripm,1e-05,0.000833,diffReps_broad_input_H3K27me3_both
