In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

from pathlib import Path
import pandas as pd
import numpy as np
import math

import seaborn as sns

from downstream.signals.signal_r2_permutation_test import collect_paths
from downstream.signals.signal_pca_fit_error_pvalue_permutation_test import process

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [5]:
signals_root = Path("/mnt/stripe/bio/experiments/signal_experiments")

Selected Paths:  1582 of 19932


In [12]:
def pvalue_for(normalization):
    simulations = 4
    # simulations = 100001
    threads = 8

    output_path = signals_root / "validate.{}.pvalue.{}.csv".format(normalization, simulations)
    print("Results file:", str(output_path), "[exists]" if output_path.exists() else "[not exists]")

    if not output_path.exists():
        print("  calculate:", str(output_path.name))

        # Collect files:
        paths_filter = "_{}.tsv".format(normalization)
        all_paths = collect_paths(signals_root)
        #datatypes = ["H3K4me1", "H3K4me3", "H3K27ac", "H3K36me3", "H3K27me3", "meth"]
        paths = [p for p in all_paths if paths_filter in p.name]
        print("Selected Paths: ", len(paths), "of", len(all_paths))

        # Calc pvalues:
        process(paths, str(output_path), seed=100, simulations=simulations, threads=threads, fdr=True)
        
    return output_path

In [13]:
df_path = pvalue_for("fripz")
df_path = pvalue_for("rawq")
df_path = pvalue_for("fripm")

Results file: /mnt/stripe/bio/experiments/signal_experiments/validate.fripz.pvalue.4.csv [not exists]
  calculate: validate.fripz.pvalue.4.csv
Selected Paths:  1582 of 19932
--- [1 / 1582] -----------
Process: /mnt/stripe/bio/experiments/signal_experiments/H3K27ac/washu_german_rrbs_filtered_dmrs_all_10.hg19/washu_german_rrbs_filtered_dmrs_all_10.hg19_fripz.tsv
[ACTUAL]: 8, [SIMUL]: [min, max] = [16, 16], [2%, 98%] = [8.64, 16.0]; 50% = 16.0, p-value: 0.2
--- [2 / 1582] -----------
Process: /mnt/stripe/bio/experiments/signal_experiments/H3K27ac/cd14_chromhmm18.hg19.11_EnhWk/cd14_chromhmm18.hg19.11_EnhWk_fripz.tsv
[ACTUAL]: 16, [SIMUL]: [min, max] = [18, 18], [2%, 98%] = [16.16, 18.0]; 50% = 18.0, p-value: 0.2
--- [3 / 1582] -----------
Process: /mnt/stripe/bio/experiments/signal_experiments/H3K27ac/cd14_chromhmm18.hg19.13_Het/cd14_chromhmm18.hg19.13_Het_fripz.tsv
[ACTUAL]: 15, [SIMUL]: [min, max] = [15, 15], [2%, 98%] = [15.0, 15.0]; 50% = 15.0, p-value: 1.0
--- [4 / 1582] -----------
P

KeyboardInterrupt: 

In [36]:
df = pd.DataFrame.from_csv("/mnt/stripe/bio/experiments/signal_experiments/H3K4me1/report.permutation_pvalue.csv")
# normalization = "fripm"
normalization = "fripz"
# normalization = "rawq"
# normalization = "scoresz"
df = df[df.normalization == normalization]
df["loci"] = [Path(f).name for f in df["file"]]

from statsmodels.stats.multitest import multipletests
_reject, pvalues_corrected, *_ = multipletests(
    pvals=df["pvalue"],
    # fdr_bh, holm-sidak, bonferroni
    alpha=0.05, method="fdr_bh"
)
df["pvalue_corr"] = pvalues_corrected
df.sort_values(by="pvalue_corr")
df.head(10)

Unnamed: 0_level_0,file,normalization,pvalue,loci,pvalue_corr
modification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,1e-05,diff_OD_YD_H3K4me1_zinbra_200_1.0E-4_10_both,0.00316
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.00024,cpg_minavcov10_complex_4outliers.narrow.adjust...,0.03792
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.00577,washu_german_rrbs_filtered_dmrs_all_10.hg19,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.00789,diffReps_H3K4me1_both,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.01151,diffReps_H3K4me1_young,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.01771,cd14_tf_PU.1-IL4_GSM1681426_hg19_q0.05_narrow,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.02201,diffReps_broad_input_H3K4me1_both,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.03708,cd14v2_chromhmm18.hg19.4_TssFlnkD,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.03924,repeats_hg19_Satellite,0.432809
H3K4me1,/mnt/stripe/bio/experiments/signal_experiments...,fripz,0.04159,cd14_chromhmm18.hg19.4_TssFlnkD,0.432809
