Skip to content

Commit

Permalink
uses SampleSheet() in _fetch_sex and bead_array to suport legacy samp…
Browse files Browse the repository at this point in the history
…le_sheets with headers (#65)
  • Loading branch information
marcmaxson committed Jan 18, 2022
1 parent 71e9e99 commit e3f4a74
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 7 deletions.
4 changes: 2 additions & 2 deletions docs/quality-control-example.ipynb
Expand Up @@ -218,7 +218,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:methylcheck.reports.bead_array:Predicting Sex...\n",
"INFO:methylcheck.reports.controls_report:Predicting Sex...\n",
"INFO:methylprep.files.manifests:Reading manifest file: MethylationEPIC_v-1-0_B4.CoreColumns.csv\n"
]
},
Expand Down Expand Up @@ -835,7 +835,7 @@
"```\n",
"\n",
"#### Notes:\n",
"- 'on_lambda': if you are running this within an AWS lambda function in the cloud, the default paths of your manifest and other files will change. When True, `on_lambda` will allow you to specify and override paths to your intput, output, and manifest files.\n",
"- 'on_lambda': if you are running this within an AWS lambda function in the cloud, the default paths of your manifest and other files will change. When True, `on_lambda` will allow you to specify and override paths to your input, output, and manifest files.\n",
"- 'path': where to read files from.\n",
"- 'outpath': in this example, `working.name` is a python `tempdir` folder in a lambda virtual environment. Everything is processed there but saved by moving to an S3 bucket. \n",
"- 'poobah': whether the QC should run on samples that have failed probes removed (recommended)\n",
Expand Down
4 changes: 4 additions & 0 deletions docs/release-history.md
@@ -1,5 +1,9 @@
# Release History

## v0.8.2
- added support for sample sheets with the legacy Illumina [Header] ... [Data] format. This requires `methylprep`
be installed for the controls report to run now.

## v0.8.1
- .load gives clearer error when loading beta values from CSVs ('beta_csv') if probe names are not unique,
and returns a list of series for each sample when indeces fail to merge (pandas.concat)
Expand Down
8 changes: 7 additions & 1 deletion methylcheck/predict/sex.py
Expand Up @@ -319,7 +319,13 @@ def _fetch_actual_sex_from_sample_sheet_meta_data(filepath, output):
loaded_files['meta'] = pd.read_pickle(filename)
break
if '.csv' in filename.suffixes:
loaded_files['meta'] = pd.read_csv(filename)
try:
from methylprep.files import SampleSheet
except ImportError:
raise ImportError("parsing a sample sheet CSV requires `methylprep` be installed first.")
#uses methylprep.files.SampleSheet() instead of --- loaded_files['meta'] = pd.read_csv(filename) --- to support legacy [header] format(s).
sample_sheet = SampleSheet(filename, filepath)
loaded_files['meta'] = sample_sheet._SampleSheet__data_frame
break
if len(loaded_files) == 1:
# methylprep v1.5.4-6 was creating meta_data files with two Sample_ID columns. Check and fix here:
Expand Down
8 changes: 7 additions & 1 deletion methylcheck/reports/bead_array.py
Expand Up @@ -124,7 +124,13 @@ def __init__(self, filepath, outfilepath=None, bg_offset=3000, cutoff_adjust=1.0
if '.pkl' in filename.suffixes:
setattr(self, 'samplesheet', pd.read_pickle(filename))
elif '.csv' in filename.suffixes:
setattr(self, 'samplesheet', pd.read_csv(filename))
try:
from methylprep.files import SampleSheet
except ImportError:
raise ImportError("parsing a sample sheet CSV requires `methylprep` be installed first.")
#uses methylprep.files.SampleSheet() instead of --- pd.read_csv(filename) --- to support legacy [header] format(s).
sample_sheet = SampleSheet(filename, filepath)
setattr(self, 'samplesheet', sample_sheet._SampleSheet__data_frame)
break

if not hasattr(self,'control'):
Expand Down
33 changes: 31 additions & 2 deletions methylcheck/samples/sklearn_mds.py
@@ -1,10 +1,39 @@
"""
Multi-dimensional Scaling (MDS)
Copied from scikit-learn v0.22.1 without changes (just all of the functions in one place for MDS to work)
author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
License: BSD 3-Clause License
Copyright (c) 2007-2021 The scikit-learn developers.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

# author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
# License: BSD
__version__ = '0.22.1'

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion methylcheck/version.py
Expand Up @@ -2,4 +2,4 @@
# 1) we don't load dependencies by storing it in __init__.py
# 2) we can import it in setup.py for the same reason
# 3) we can import it into your module module
__version__ = '0.8.1'
__version__ = '0.8.2'

0 comments on commit e3f4a74

Please sign in to comment.