Skip to content

Commit

Permalink
fixed get_sex plotting options and added testing
Browse files Browse the repository at this point in the history
  • Loading branch information
marcmaxson committed Feb 8, 2022
1 parent 0f2381a commit be6b50f
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 21 deletions.
46 changes: 27 additions & 19 deletions methylcheck/predict/sex.py
Expand Up @@ -187,25 +187,33 @@ def get_sex(data_source, array_type=None, verbose=False, plot=False, save=False,
if data_source_type in ('path'):
output = _fetch_actual_sex_from_sample_sheet_meta_data(data_source, output)

if plot == True or return_fig == True:
fig = _plot_predicted_sex(data=output, # 'x_median', 'y_median', 'predicted_sex', 'X_fail_percent', 'Y_fail_percent'
sample_failure_percent=sample_failure_percent,
median_cutoff=median_cutoff,
include_probe_failure_percent=include_probe_failure_percent,
verbose=verbose,
save=save,
poobah_cutoff=poobah_cutoff,
custom_label=custom_label,
data_source_type=data_source_type,
data_source=data_source,
return_fig=return_fig,
return_labels=return_labels,
)
if return_labels:
return fig # these are a lookup dictionary of labels
if return_fig:
return fig
return output
if plot == False and return_fig == False and return_labels == False:
return output

# plot, return_fig, or return_labels
fig_or_labels = _plot_predicted_sex(data=output, # 'x_median', 'y_median', 'predicted_sex', 'X_fail_percent', 'Y_fail_percent'
sample_failure_percent=sample_failure_percent,
median_cutoff=median_cutoff,
include_probe_failure_percent=include_probe_failure_percent,
verbose=verbose,
save=save,
poobah_cutoff=poobah_cutoff,
custom_label=custom_label,
data_source_type=data_source_type,
data_source=data_source,
return_fig=return_fig,
return_labels=return_labels,
)
if return_labels:
return fig_or_labels # these are a lookup dictionary of labels, not the plt.gcf() figure
elif plot == False and return_fig == True:
return fig_or_labels # seaborn.axisgrid.FacetGrid object
elif plot == True and return_fig == True:
plt.show()
return fig_or_labels # seaborn.axisgrid.FacetGrid object
elif plot == True and return_fig == False:
return # already plotted if return_labels was false and return_fig was false



def _plot_predicted_sex(data=pd.DataFrame(),
Expand Down
3 changes: 2 additions & 1 deletion methylcheck/qc_plot.py
Expand Up @@ -313,7 +313,8 @@ def _get_data(data_containers=None, path=None, compare=False, noob=True, verbose
_unmeth = pd.merge(left=_unmeth, right=sample[f'{n2}unmeth'], left_on='IlmnID', right_on=sample['IlmnID'])
_unmeth = _unmeth.rename(columns={f'{n2}unmeth': sample_filenames[idx]})
else:
print(f"{len(csvs)} processed samples found in {path} using NOOB: {noob}.")
if verbose:
print(f"{len(csvs)} processed samples found in {path} using NOOB: {noob}.")
if files_found:
data_columns = "NOOB meth/unmeth" if noob else "non-NOOB-corrected meth/unmeth"
print(f"processed files found, but did not contain the right data ({data_columns})")
Expand Down
30 changes: 30 additions & 0 deletions tests/predict/test_sex.py
Expand Up @@ -19,6 +19,7 @@
PROCESSED_450K = Path('docs/example_data/GSE69852') # partial data
MOUSE_TEST = Path('docs/example_data/mouse_test') # partial data


@patch("methylcheck.qc_plot.plt.show")
def test_get_sex_plot_label_compare_with_actual_450k(mock):
meta = pd.read_pickle(Path(PROCESSED_450K,'sample_sheet_meta_data.pkl'))
Expand Down Expand Up @@ -68,3 +69,32 @@ def test_get_actual_sex():
with pytest.raises(KeyError) as excinfo:
df4 = methylcheck.predict.sex._fetch_actual_sex_from_sample_sheet_meta_data(LOCAL,df3)
assert excinfo.value.message == "Could not read actual sex from meta data to compare."

@patch("methylcheck.qc_plot.plt.show")
def test_get_sex_plot_return_fig_labels(mock):
""" verifies all the options and what they return; cannot check if show() worked. """
import seaborn
from io import StringIO
fig = methylcheck.get_sex(MOUSE_TEST, return_fig=False, plot=True)
if not isinstance(fig, type(None)):
raise AssertionError("return_fig was false, but returned something, instead of None")
fig = methylcheck.get_sex(MOUSE_TEST, return_fig=True, plot=True)
if not isinstance(fig, seaborn.axisgrid.FacetGrid):
raise AssertionError("return_fig was True, but did not return figure")
fig = methylcheck.get_sex(MOUSE_TEST, return_fig=True, plot=False)
if not isinstance(fig, seaborn.axisgrid.FacetGrid):
raise AssertionError("return_fig was True, but did not return figure")
labels = methylcheck.get_sex(MOUSE_TEST, return_fig=False, plot=False, return_labels=True)
ref_labels = {'204879580038_R01C02': 'A', '204879580038_R02C02': 'B', '204879580038_R03C02': 'C', '204879580038_R04C02': 'D', '204879580038_R05C02': 'E', '204879580038_R06C02': 'F'}
if labels != ref_labels:
raise AssertionError("return_labels did not match expected output")
default = methylcheck.get_sex(MOUSE_TEST)
ref_df = pd.read_csv(StringIO("""sample\tx_median\ty_median\tpredicted_sex\tX_fail_percent\tY_fail_percent\tactual_sex\tsex_matches
204879580038_R01C02\t12.0\t7.4\t F\t 2.9\t 28.0\t M\t 0
204879580038_R02C02\t12.5\t7.8\t F\t 2.5\t 27.9\t M\t 0
204879580038_R03C02\t12.1\t7.5\t F\t 5.4\t 28.2\t M\t 0
204879580038_R04C02\t12.0\t7.4\t F\t 5.6\t 28.2\t M\t 0
204879580038_R05C02\t12.1\t7.6\t F\t 5.6\t 28.2\t M\t 0
204879580038_R06C02\t11.9\t7.3\t F\t 7.1\t 28.3\t M\t 0"""), sep='\t').set_index('sample')
if not default[['x_median','y_median']].equals(ref_df[['x_median','y_median']]):
raise AssertionError("default output (dataframe of predicted sexes) did not match reference data")
2 changes: 1 addition & 1 deletion tests/samples/test_sample.py
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
import methylprep # for manifest support
from pathlib import Path
PATH = Path('docs/example_data/mouse')
PATH = Path('docs/example_data/mouse/')

class TestProcessedSample():
manifest = methylprep.Manifest(methylprep.ArrayType('mouse'))
Expand Down

0 comments on commit be6b50f

Please sign in to comment.