From 940eb8973a52ae394ff30979736fc4f86dede59a Mon Sep 17 00:00:00 2001 From: marcmaxson Date: Wed, 7 Apr 2021 14:38:13 -0600 Subject: [PATCH] beta_mds_plot requires at least 2 samples now, to avoid MDS warnings --- methylcheck/qc_report.py | 3 +-- methylcheck/samples/postprocessQC.py | 4 ++++ tests/reports/test_qc_report.py | 15 ++++++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/methylcheck/qc_report.py b/methylcheck/qc_report.py index 1632384..c7387ae 100644 --- a/methylcheck/qc_report.py +++ b/methylcheck/qc_report.py @@ -303,7 +303,6 @@ def __init__(self, **kwargs): # https://stackoverflow.com/questions/8187082/how-can-you-set-class-attributes-from-variable-arguments-kwargs-in-python self.__dict__.update(kwargs) self.debug = True if self.__dict__.get('debug') == True else False - self.__dict__.pop('debug',None) self.__dict__['poobah_max_percent'] = self.__dict__.get('poobah_max_percent', 5) self.__dict__['pval_cutoff'] = self.__dict__.get('pval_cutoff', 0.05) self.errors = self.open_error_buffer() @@ -443,7 +442,7 @@ def run_qc(self): self.to_table(list_of_lists, col_names=['Sample_ID', 'Percent', 'Pass/Fail'], row_names=None, add_title='Detection Poobah') - if part == 'mds': + if part == 'mds' and len(beta_df.columns) > 1: LOGGER.info("Beta MDS Plot") # ax and df_to_retain are not used, but could go into a qc chart fig, ax, df_indexes_to_retain = methylcheck.beta_mds_plot(beta_df, silent=True, multi_params={'return_plot_obj':True, 'draw_box':True}) diff --git a/methylcheck/samples/postprocessQC.py b/methylcheck/samples/postprocessQC.py index 3309e5e..10fa43f 100644 --- a/methylcheck/samples/postprocessQC.py +++ b/methylcheck/samples/postprocessQC.py @@ -363,6 +363,9 @@ def beta_mds_plot(df, filter_stdev=1.5, verbose=False, save=False, silent=False, if verbose: logging.basicConfig(level=logging.INFO) + if len(df.columns) < 2: + LOGGER.warning("beta_mds_plot requires at least 2 samples") + return df # ensure "long format": probes in rows and samples in cols. This is how methylprep returns data. if df.shape[1] < df.shape[0]: @@ -372,6 +375,7 @@ def beta_mds_plot(df, filter_stdev=1.5, verbose=False, save=False, silent=False, if verbose: LOGGER.info(f"Your data needed to be transposed (from {pre_df_shape} to {df.shape}) to ensure probes are in columns.") original_df = df.copy() # samples in index, guaranteed. transpose at end + # require 2 or more samples for MDS # CHECK for missing probe values NaN -- this is common as of methylprep version 1.2.5 because pOOBah removes probes from samples by default. missing_probe_counts = df.isna().sum() diff --git a/tests/reports/test_qc_report.py b/tests/reports/test_qc_report.py index 139cad2..7a44186 100644 --- a/tests/reports/test_qc_report.py +++ b/tests/reports/test_qc_report.py @@ -25,9 +25,10 @@ def test_qc_run_pipeline(self): df = methylcheck.load(PROCESSED_450K) methylcheck.run_pipeline(df, exclude_all=True, plot=['all'], silent=True) + def test_ReportPDF(self): import warnings - warnings.filterwarnings('ignore') + warnings.filterwarnings("ignore", message='invalid value encountered') myreport = methylcheck.ReportPDF(path=PROCESSED_450K, outpath=PROCESSED_450K) myreport.run_qc() myreport.pdf.close() @@ -35,3 +36,15 @@ def test_ReportPDF(self): Path(PROCESSED_450K,'multipage_pdf.pdf').unlink() else: raise FileNotFoundError(Path(PROCESSED_450K,'multipage_pdf.pdf')) + + + def test_dummy(self): + import warnings + from sklearn.manifold import MDS + warnings.filterwarnings("ignore", message='invalid value encountered') + warnings.filterwarnings("ignore", category=RuntimeWarning) + #df = methylcheck.load(PROCESSED_450K) + df = pd.DataFrame(data={'9247377093_R02C01':[0.1,.2,.3,.4,.5,.6,np.nan,np.nan,0.8,0.9,1.0], + 'two':[0.1,.2,.3,.4,.5,np.nan,.6,np.nan,0.8,0.9,0.1]}) + mds = MDS(n_jobs=-1, random_state=1, verbose=1) + mds_transformed = mds.fit_transform(df.dropna().transpose().values)