fixed get_sex plotting options and added testing

FoxoTech · Feb 8, 2022 · be6b50f · be6b50f
1 parent 0f2381a
commit be6b50f
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 21 deletions.
diff --git a/methylcheck/predict/sex.py b/methylcheck/predict/sex.py
@@ -187,25 +187,33 @@ def get_sex(data_source, array_type=None, verbose=False, plot=False, save=False,
     if data_source_type in ('path'):
         output = _fetch_actual_sex_from_sample_sheet_meta_data(data_source, output)
 
-    if plot == True or return_fig == True:
-        fig = _plot_predicted_sex(data=output, # 'x_median', 'y_median', 'predicted_sex', 'X_fail_percent', 'Y_fail_percent'
-            sample_failure_percent=sample_failure_percent,
-            median_cutoff=median_cutoff,
-            include_probe_failure_percent=include_probe_failure_percent,
-            verbose=verbose,
-            save=save,
-            poobah_cutoff=poobah_cutoff,
-            custom_label=custom_label,
-            data_source_type=data_source_type,
-            data_source=data_source,
-            return_fig=return_fig,
-            return_labels=return_labels,
-            )
-        if return_labels:
-            return fig # these are a lookup dictionary of labels
-    if return_fig:
-        return fig
-    return output
+    if plot == False and return_fig == False and return_labels == False:
+        return output
+
+    # plot, return_fig, or return_labels
+    fig_or_labels = _plot_predicted_sex(data=output, # 'x_median', 'y_median', 'predicted_sex', 'X_fail_percent', 'Y_fail_percent'
+        sample_failure_percent=sample_failure_percent,
+        median_cutoff=median_cutoff,
+        include_probe_failure_percent=include_probe_failure_percent,
+        verbose=verbose,
+        save=save,
+        poobah_cutoff=poobah_cutoff,
+        custom_label=custom_label,
+        data_source_type=data_source_type,
+        data_source=data_source,
+        return_fig=return_fig,
+        return_labels=return_labels,
+        )
+    if return_labels:
+        return fig_or_labels # these are a lookup dictionary of labels, not the plt.gcf() figure
+    elif plot == False and return_fig == True:
+        return fig_or_labels # seaborn.axisgrid.FacetGrid object
+    elif plot == True and return_fig == True:
+        plt.show()
+        return fig_or_labels # seaborn.axisgrid.FacetGrid object
+    elif plot == True and return_fig == False:
+        return # already plotted if return_labels was false and return_fig was false
+
 
 
 def _plot_predicted_sex(data=pd.DataFrame(),

diff --git a/methylcheck/qc_plot.py b/methylcheck/qc_plot.py
@@ -313,7 +313,8 @@ def _get_data(data_containers=None, path=None, compare=False, noob=True, verbose
                         _unmeth = pd.merge(left=_unmeth, right=sample[f'{n2}unmeth'], left_on='IlmnID', right_on=sample['IlmnID'])
                         _unmeth = _unmeth.rename(columns={f'{n2}unmeth': sample_filenames[idx]})
             else:
-                print(f"{len(csvs)} processed samples found in {path} using NOOB: {noob}.")
+                if verbose:
+                    print(f"{len(csvs)} processed samples found in {path} using NOOB: {noob}.")
                 if files_found:
                     data_columns = "NOOB meth/unmeth" if noob else "non-NOOB-corrected meth/unmeth"
                     print(f"processed files found, but did not contain the right data ({data_columns})")

diff --git a/tests/predict/test_sex.py b/tests/predict/test_sex.py
@@ -19,6 +19,7 @@
 PROCESSED_450K = Path('docs/example_data/GSE69852') # partial data
 MOUSE_TEST = Path('docs/example_data/mouse_test') # partial data
 
+
 @patch("methylcheck.qc_plot.plt.show")
 def test_get_sex_plot_label_compare_with_actual_450k(mock):
     meta = pd.read_pickle(Path(PROCESSED_450K,'sample_sheet_meta_data.pkl'))
@@ -68,3 +69,32 @@ def test_get_actual_sex():
         with pytest.raises(KeyError) as excinfo:
             df4 = methylcheck.predict.sex._fetch_actual_sex_from_sample_sheet_meta_data(LOCAL,df3)
             assert excinfo.value.message == "Could not read actual sex from meta data to compare."
+
+@patch("methylcheck.qc_plot.plt.show")
+def test_get_sex_plot_return_fig_labels(mock):
+    """ verifies all the options and what they return; cannot check if show() worked. """
+    import seaborn
+    from io import StringIO
+    fig = methylcheck.get_sex(MOUSE_TEST, return_fig=False, plot=True)
+    if not isinstance(fig, type(None)):
+        raise AssertionError("return_fig was false, but returned something, instead of None")
+    fig = methylcheck.get_sex(MOUSE_TEST, return_fig=True, plot=True)
+    if not isinstance(fig, seaborn.axisgrid.FacetGrid):
+        raise AssertionError("return_fig was True, but did not return figure")
+    fig = methylcheck.get_sex(MOUSE_TEST, return_fig=True, plot=False)
+    if not isinstance(fig, seaborn.axisgrid.FacetGrid):
+        raise AssertionError("return_fig was True, but did not return figure")
+    labels = methylcheck.get_sex(MOUSE_TEST, return_fig=False, plot=False, return_labels=True)
+    ref_labels = {'204879580038_R01C02': 'A', '204879580038_R02C02': 'B', '204879580038_R03C02': 'C', '204879580038_R04C02': 'D', '204879580038_R05C02': 'E', '204879580038_R06C02': 'F'}
+    if labels != ref_labels:
+        raise AssertionError("return_labels did not match expected output")
+    default = methylcheck.get_sex(MOUSE_TEST)
+    ref_df = pd.read_csv(StringIO("""sample\tx_median\ty_median\tpredicted_sex\tX_fail_percent\tY_fail_percent\tactual_sex\tsex_matches
+204879580038_R01C02\t12.0\t7.4\t             F\t             2.9\t            28.0\t          M\t           0
+204879580038_R02C02\t12.5\t7.8\t             F\t             2.5\t            27.9\t          M\t           0
+204879580038_R03C02\t12.1\t7.5\t             F\t             5.4\t            28.2\t          M\t           0
+204879580038_R04C02\t12.0\t7.4\t             F\t             5.6\t            28.2\t          M\t           0
+204879580038_R05C02\t12.1\t7.6\t             F\t             5.6\t            28.2\t          M\t           0
+204879580038_R06C02\t11.9\t7.3\t             F\t             7.1\t            28.3\t          M\t           0"""), sep='\t').set_index('sample')
+    if not default[['x_median','y_median']].equals(ref_df[['x_median','y_median']]):
+        raise AssertionError("default output (dataframe of predicted sexes) did not match reference data")
diff --git a/tests/samples/test_sample.py b/tests/samples/test_sample.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import methylprep # for manifest support
 from pathlib import Path
-PATH = Path('docs/example_data/mouse')
+PATH = Path('docs/example_data/mouse/')
 
 class TestProcessedSample():
     manifest = methylprep.Manifest(methylprep.ArrayType('mouse'))