Skip to content

Commit

Permalink
Merge pull request #8 from Clinical-Genomics/tests
Browse files Browse the repository at this point in the history
first test and some exeption handeling
  • Loading branch information
mayabrandi committed Sep 3, 2020
2 parents d1629da + e0729b1 commit 475fe21
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 8 deletions.
14 changes: 11 additions & 3 deletions NIPTool/build/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,18 @@
def build_sample(sample_data: dict):
"""Builds a document for the sample collection"""


sample = {'_id': sample_data.get('SampleID')}

for key in SAMPLE_KEYS:
if sample_data.get(key) is not None:
sample[key] = sample_data[key]
sample['SampleProject'] = str(sample['SampleProject'])
value = sample_data.get(key)
if isinstance(value, str) and not value.strip():
continue
if value is None:
continue
sample[key] = value

if sample.get('SampleProject'):
sample['SampleProject'] = str(sample['SampleProject'])

return sample
7 changes: 6 additions & 1 deletion NIPTool/commands/load/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from NIPTool.load.batch import load_one_batch
from flask.cli import with_appcontext, current_app
from datetime import date, timedelta
from NIPTool.exeptions import NIPToolError


LOG = logging.getLogger(__name__)
Expand All @@ -14,4 +15,8 @@
def batch(batch_path):
"""Read and load lims data for one sample, all samples or the most recently updated samples."""

load_one_batch(current_app.adapter, batch_path )
try:
load_one_batch(current_app.adapter, batch_path)
except NIPToolError as e:
LOG.error(e.message)
raise click.Abort()
12 changes: 12 additions & 0 deletions NIPTool/exeptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@


class NIPToolError(Exception):
def __init__(self, message):
self.message = message

class MissingResultsError(NIPToolError):
pass


class FileValidationError(NIPToolError):
pass
4 changes: 3 additions & 1 deletion NIPTool/load/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@


def load_one_batch(adapter, nipt_results_path:str):
"""Function to load one lims sample into the database"""
"""Function to load one lims sample into the database.
Raises:
MissingResultsError: when parsing file that is empty"""

batch_data = parse_batch_file(nipt_results_path)
for sample in batch_data:
Expand Down
101 changes: 101 additions & 0 deletions NIPTool/models/nipt_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from pandas_schema import Column, Schema
from pandas_schema.validation import (
LeadingWhitespaceValidation,
TrailingWhitespaceValidation,
CanConvertValidation,
InListValidation,
CustomElementValidation,
)

EmptyStringValidation = CustomElementValidation(
lambda d: d != "", "This field cannot be empty"
)

nipt_results_schema = Schema(
[
Column("SampleID", [TrailingWhitespaceValidation(), EmptyStringValidation]),
Column("SampleType", []),
Column("Description", []),
Column(
"SampleProject", [TrailingWhitespaceValidation(), EmptyStringValidation]
),
Column("Index1", []),
Column("Index2", []),
Column("Library_nM", []),
Column("QCFlag", []),
Column("Zscore_13", [CanConvertValidation(float)]),
Column("Zscore_18", [CanConvertValidation(float)]),
Column("Zscore_21", [CanConvertValidation(float)]),
Column("Zscore_X", [CanConvertValidation(float)]),
Column("Ratio_13", [CanConvertValidation(float)]),
Column("Ratio_18", [CanConvertValidation(float)]),
Column("Ratio_21", [CanConvertValidation(float)]),
Column("Ratio_X", [CanConvertValidation(float)]),
Column("Ratio_Y", [CanConvertValidation(float)]),
Column("MappedReads", [CanConvertValidation(int)]),
Column("GC_Dropout", [CanConvertValidation(float)]),
Column("AT_Dropout", [CanConvertValidation(float)]),
Column("Chr1_Ratio", [CanConvertValidation(float)]),
Column("Chr2_Ratio", [CanConvertValidation(float)]),
Column("Chr3_Ratio", [CanConvertValidation(float)]),
Column("Chr4_Ratio", [CanConvertValidation(float)]),
Column("Chr5_Ratio", [CanConvertValidation(float)]),
Column("Chr6_Ratio", [CanConvertValidation(float)]),
Column("Chr7_Ratio", [CanConvertValidation(float)]),
Column("Chr8_Ratio", [CanConvertValidation(float)]),
Column("Chr9_Ratio", [CanConvertValidation(float)]),
Column("Chr10_Ratio", [CanConvertValidation(float)]),
Column("Chr11_Ratio", [CanConvertValidation(float)]),
Column("Chr12_Ratio", [CanConvertValidation(float)]),
Column("Chr14_Ratio", [CanConvertValidation(float)]),
Column("Chr15_Ratio", [CanConvertValidation(float)]),
Column("Chr16_Ratio", [CanConvertValidation(float)]),
Column("Chr17_Ratio", [CanConvertValidation(float)]),
Column("Chr19_Ratio", [CanConvertValidation(float)]),
Column("Chr20_Ratio", [CanConvertValidation(float)]),
Column("Chr22_Ratio", [CanConvertValidation(float)]),
Column("Chr1", [CanConvertValidation(float)]),
Column("Chr2", [CanConvertValidation(float)]),
Column("Chr3", [CanConvertValidation(float)]),
Column("Chr4", [CanConvertValidation(float)]),
Column("Chr5", [CanConvertValidation(float)]),
Column("Chr6", [CanConvertValidation(float)]),
Column("Chr7", [CanConvertValidation(float)]),
Column("Chr8", [CanConvertValidation(float)]),
Column("Chr9", [CanConvertValidation(float)]),
Column("Chr10", [CanConvertValidation(float)]),
Column("Chr11", [CanConvertValidation(float)]),
Column("Chr12", [CanConvertValidation(float)]),
Column("Chr13", [CanConvertValidation(float)]),
Column("Chr14", [CanConvertValidation(float)]),
Column("Chr15", [CanConvertValidation(float)]),
Column("Chr16", [CanConvertValidation(float)]),
Column("Chr17", [CanConvertValidation(float)]),
Column("Chr18", [CanConvertValidation(float)]),
Column("Chr19", [CanConvertValidation(float)]),
Column("Chr20", [CanConvertValidation(float)]),
Column("Chr21", [CanConvertValidation(float)]),
Column("Chr22", [CanConvertValidation(float)]),
Column("ChrX", [CanConvertValidation(float)]),
Column("ChrY", [CanConvertValidation(float)]),
Column("FF_Formatted", [CanConvertValidation(float)]),
Column("FFY", [CanConvertValidation(float)]),
Column("FFX", [CanConvertValidation(float)]),
Column("DuplicationRate", [CanConvertValidation(float)]),
Column("Bin2BinVariance", [CanConvertValidation(float)]),
Column("UnfilteredCNVcalls", [CanConvertValidation(float)]),
Column("CNVSegment", [InListValidation(["Found"])]),
Column("Flowcell", []),
Column("SequencingDate", []),
Column("Median_13", [CanConvertValidation(float)]),
Column("Median_18", [CanConvertValidation(float)]),
Column("Median_21", [CanConvertValidation(float)]),
Column("Median_X", [CanConvertValidation(float)]),
Column("Median_Y", [CanConvertValidation(float)]),
Column("Stdev_13", [CanConvertValidation(float)]),
Column("Stdev_18", [CanConvertValidation(float)]),
Column("Stdev_21", [CanConvertValidation(float)]),
Column("Stdev_X", [CanConvertValidation(float)]),
Column("Stdev_Y", [CanConvertValidation(float)]),
]
)
16 changes: 13 additions & 3 deletions NIPTool/parse/batch.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
import logging
import pandas as pd
import glob
from NIPTool.exeptions import MissingResultsError, FileValidationError
from NIPTool.models.nipt_results import nipt_results_schema

LOG = logging.getLogger(__name__)


def parse_batch_file(nipt_results_path: str) -> list:
def parse_batch_file(nipt_results_path: dict) -> list:
if not glob.glob(nipt_results_path):
LOG.exception("Results file missing")
return {}
raise MissingResultsError("Results file missing.")

nipt_results = glob.glob(nipt_results_path)[0]
df = pd.read_csv(nipt_results, na_filter=False)

errors = nipt_results_schema.validate(df)

for err in errors:
LOG.warning(err)

if errors:
raise FileValidationError("Invalid file content.")

result = df.to_dict(orient="records")
return result
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ authlib
requests
flask_ldap3_login
pandas
pandas-schema
24 changes: 24 additions & 0 deletions tests/build/test_build_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from NIPTool.build.sample import build_sample
import pytest


def test_build_sample():
# GIVEN a sample_data with requiered key 'SampleID'
sample_data = {"SampleID": "2020-07452-02",
"SampleType": " ",
"Description": " ",
"SampleProject": 201862,
"Zscore_13": -10.1836097044367}

# WHEN building a mongo application tag
mongo_application_tag = build_sample(sample_data = sample_data)

# THEN assert mongo_application_tag is
# {"_id": "2020-07452-02","SampleID": "2020-07452-02",
# "SampleProject": "201862","Zscore_13": -10.1836097044367}

assert mongo_application_tag == {"_id": "2020-07452-02",
"SampleID": "2020-07452-02",
"SampleProject": "201862",
"Zscore_13": -10.1836097044367}

0 comments on commit 475fe21

Please sign in to comment.