From 1c481a1791860307086945356817116b46e6d35d Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 7 Sep 2020 11:27:33 +0200 Subject: [PATCH 1/8] adding converter model and one test --- NIPTool/build/sample.py | 26 +++++++++++++------ NIPTool/constants/__init__.py | 0 NIPTool/{constants => models}/constants.py | 0 NIPTool/models/converters.py | 12 +++++++++ .../{nipt_results.py => validation_schema.py} | 0 NIPTool/parse/batch.py | 2 +- 6 files changed, 31 insertions(+), 9 deletions(-) delete mode 100644 NIPTool/constants/__init__.py rename NIPTool/{constants => models}/constants.py (100%) create mode 100644 NIPTool/models/converters.py rename NIPTool/models/{nipt_results.py => validation_schema.py} (100%) diff --git a/NIPTool/build/sample.py b/NIPTool/build/sample.py index e92bcdff..33312d5d 100644 --- a/NIPTool/build/sample.py +++ b/NIPTool/build/sample.py @@ -1,20 +1,30 @@ -from NIPTool.constants.constants import SAMPLE_KEYS +from NIPTool.models.constants import SAMPLE_KEYS +from NIPTool.models.converters import CONVERTERS + + +def convert(key, value): + """Convert values according to the converter model""" + + if value is None: + return value + + for function, keys in CONVERTERS.items(): + if key in keys: + return function(value) + + return value + def build_sample(sample_data: dict): """Builds a document for the sample collection""" + sample = {"_id": sample_data.get("SampleID")} - sample = {'_id': sample_data.get('SampleID')} - for key in SAMPLE_KEYS: value = sample_data.get(key) - if isinstance(value, str) and not value.strip(): - continue + value = convert(key, value) if value is None: continue sample[key] = value - - if sample.get('SampleProject'): - sample['SampleProject'] = str(sample['SampleProject']) return sample diff --git a/NIPTool/constants/__init__.py b/NIPTool/constants/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/NIPTool/constants/constants.py b/NIPTool/models/constants.py similarity index 100% rename from NIPTool/constants/constants.py rename to NIPTool/models/constants.py diff --git a/NIPTool/models/converters.py b/NIPTool/models/converters.py new file mode 100644 index 00000000..a3998f89 --- /dev/null +++ b/NIPTool/models/converters.py @@ -0,0 +1,12 @@ +def empty_str(x): + """Convert empty string to None""" + + if not isinstance(x, str): + return x + x = x.strip() + if not x: + return None + return x + + +CONVERTERS = {str: ["SampleProject"], empty_str: ["SampleType", "Description"]} diff --git a/NIPTool/models/nipt_results.py b/NIPTool/models/validation_schema.py similarity index 100% rename from NIPTool/models/nipt_results.py rename to NIPTool/models/validation_schema.py diff --git a/NIPTool/parse/batch.py b/NIPTool/parse/batch.py index 1851439b..a65a4081 100644 --- a/NIPTool/parse/batch.py +++ b/NIPTool/parse/batch.py @@ -2,7 +2,7 @@ import pandas as pd import glob from NIPTool.exeptions import MissingResultsError, FileValidationError -from NIPTool.models.nipt_results import nipt_results_schema +from NIPTool.models.validation_schema import nipt_results_schema LOG = logging.getLogger(__name__) From 7a3afeb149a5c6108676cf0b64998e06510cd436 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 7 Sep 2020 12:07:42 +0200 Subject: [PATCH 2/8] more tests --- NIPTool/parse/batch.py | 2 +- tests/build/test_build_sample.py | 51 +++++++++++++++++---------- tests/conftest.py | 20 +++++++++++ tests/fixtures/not_a_valid_fluffy.csv | 4 +++ tests/fixtures/valid_fluffy.csv | 4 +++ tests/parse/test_parse_batch.py | 34 ++++++++++++++++++ 6 files changed, 96 insertions(+), 19 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/fixtures/not_a_valid_fluffy.csv create mode 100644 tests/fixtures/valid_fluffy.csv create mode 100644 tests/parse/test_parse_batch.py diff --git a/NIPTool/parse/batch.py b/NIPTool/parse/batch.py index a65a4081..e8267a09 100644 --- a/NIPTool/parse/batch.py +++ b/NIPTool/parse/batch.py @@ -7,7 +7,7 @@ LOG = logging.getLogger(__name__) -def parse_batch_file(nipt_results_path: dict) -> list: +def parse_batch_file(nipt_results_path: str) -> list: if not glob.glob(nipt_results_path): raise MissingResultsError("Results file missing.") diff --git a/tests/build/test_build_sample.py b/tests/build/test_build_sample.py index 0dd3ac4d..0fcf6f0f 100644 --- a/tests/build/test_build_sample.py +++ b/tests/build/test_build_sample.py @@ -1,24 +1,39 @@ -from NIPTool.build.sample import build_sample -import pytest +from NIPTool.build.sample import build_sample +import pytest -def test_build_sample(): - # GIVEN a sample_data with requiered key 'SampleID' - sample_data = {"SampleID": "2020-07452-02", - "SampleType": " ", - "Description": " ", - "SampleProject": 201862, - "Zscore_13": -10.1836097044367} +def test_build_sample(): + # GIVEN a sample_data with requiered key 'SampleID' + sample_data = { + "SampleID": "2020-07452-02", + "Description": " ", + "SampleProject": 201862, + "Zscore_13": -10.1836097044367, + } - # WHEN building a mongo application tag - mongo_application_tag = build_sample(sample_data = sample_data) + # WHEN building a mongo sample + mongo_sample = build_sample(sample_data=sample_data) - # THEN assert mongo_application_tag is - # {"_id": "2020-07452-02","SampleID": "2020-07452-02", - # "SampleProject": "201862","Zscore_13": -10.1836097044367} + # THEN the mongo_sample has a key "_id" with the value of "SampleID" + assert mongo_sample == { + "_id": "2020-07452-02", + "SampleID": "2020-07452-02", + "SampleProject": "201862", + "Zscore_13": -10.1836097044367, + } - assert mongo_application_tag == {"_id": "2020-07452-02", - "SampleID": "2020-07452-02", - "SampleProject": "201862", - "Zscore_13": -10.1836097044367} +def test_build_sample_wrong_keys(): + # GIVEN a sample_data with not accepted keys: key1 key2 key3 + sample_data = { + "SampleID": "2020-07452-02", + "key1": " ", + "key2": 201862, + "key3": -10.1836097044367, + } + + # WHEN building a mongo sample + mongo_sample = build_sample(sample_data=sample_data) + + # THEN the unaccepted keys will not be part of the mongo_sample" + assert mongo_sample == {"_id": "2020-07452-02", "SampleID": "2020-07452-02"} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..8cf8cc98 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,20 @@ +import pytest + + +########################################## +###### fixture files for input csv ###### +########################################## + + +@pytest.fixture +def valid_csv(): + """Get file path to valid csv""" + + return 'tests/fixtures/valid_fluffy.csv' + + +@pytest.fixture +def invalid_csv(): + """Get file path to invalid csv""" + + return 'tests/fixtures/not_a_valid_fluffy.csv' \ No newline at end of file diff --git a/tests/fixtures/not_a_valid_fluffy.csv b/tests/fixtures/not_a_valid_fluffy.csv new file mode 100644 index 00000000..c313d2c1 --- /dev/null +++ b/tests/fixtures/not_a_valid_fluffy.csv @@ -0,0 +1,4 @@ +o_18,Ratio_21,Ratio_X,Ratio_Y,MappedReads,GC_Dropout,AT_Dropout,Chr1_Ratio,Chr2_Ratio,Chr3_Ratio,Chr4_Ratio,Chr5_Ratio,Chr6_Ratio,Chr7_Ratio,Chr8_Ratio,Chr9_Ratio,Chr10_Ratio,Chr11_Ratio,Chr12_Ratio,Chr14_Ratio,Chr15_Ratio,Chr16_Ratio,Chr17_Ratio,Chr19_Ratio,Chr20_Ratio,Chr22_Ratio,Chr1,Chr2,Chr3,Chr4,Chr5,Chr6,Chr7,Chr8,Chr9,Chr10,Chr11,Chr12,Chr13,Chr14,Chr15,Chr16,Chr17,Chr18,Chr19,Chr20,Chr21,Chr22,ChrX,ChrY,Median_13,Median_18,Median_21,Median_X,Median_Y,Stdev_13,Stdev_18,Stdev_21,Stdev_X,Stdev_Y,FF_Formatted,FFY,FFX,DuplicationRate,Bin2BinVariance,UnfilteredCNVcalls,CNVSegment + ,2020-03-10,,,,201862,CAGGTTCC,CGAACACC,,,1.56645731780181,5.7935003491953,-0.034484656889219,-22.4484387749091,1.00392976488,1.0145378044,0.999592517988,0.895534547566,0.00010705927715,29208118,1.2E-05,33.401583,0.995645196832,0.993212778561,0.962823087616,1.06202342547,0.99049923318,1.00919569724,1.03740345147,0.98228215486,0.980352236205,0.990497684064,0.961517632949,1.01303209758,0.986809900603,0.995026518647,0.996175293127,1.03451169241,1.04821216494,0.95282466333,1.04782507766,2029934,1858430,1420819,1157784,1259965,1230164,1245957,1088639,967314,1175662,1072024,1072024,626410,735444,748838,876563,935427,569534,745983,662560,297366,486441,1010813,3127,0.994623404680424,1.01950547134618,1.00710552415657,0.840822847230173,0.000112249820909,0.014076824975767,0.009517510060085,0.006751174544696,0.029800076293786,6.53186791196846E-05,9.804,0.0,10.44,0.1005291748,0.024194141925153,47,Found +,2020-03-10,,,,201862,TTACCTGG,ACGATTGC,,,1.38366647361139,5.81331308777849,3.70734623274885,-35.5689808856014,1.00345380527,1.01458753978,1.01449670237,0.834596652286,0.000183798072485,19619357,0.000187,33.815994,0.990601777174,0.995167604367,0.962072961919,1.05777118464,0.993385479259,1.01240316831,1.03903325686,0.991366632222,0.983488534995,0.993293317385,0.95505612457,1.01314923931,0.973499284889,0.991858047435,0.988736547703,1.03613747718,1.05346928988,0.947528223568,1.03829176495,1354513,1243481,947437,772255,840547,827926,833468,730865,648361,787219,716799,716799,419651,486482,500012,580410,626225,378761,503768,442069,201359,324729,645131,3606,0.994623404680424,1.01950547134618,1.00710552415657,0.840822847230173,0.000112249820909,0.014076824975767,0.009517510060085,0.006751174544696,0.029800076293786,6.53186791196846E-05,10.56,4.6048225214,17.59,0.103652428568,0.023080843735203,42,Found +2020-07455-02,2020-03-10,,,,201862,CTATCATT,GACCACCT,,,1.19771475047014,5.9936781609917,0.172213067367254,-27.3372140187147,1.00296961522,1.01504030531,1.00041582109,0.872828803296,0.000101746653098,30035386,9.7E-05,34.339258,0.994369406522,0.994050565447,0.963985487377,1.06586994,0.991903746769,1.01417054338,1.04011435706,0.989841130219,0.987191423231,0.990561923283,0.952624100278,1.01511372968,0.975874349841,0.997388670577,0.996128439123,1.04577514591,1.06297625217,0.947324666538,1.04127965945,2011620,1827738,1395600,1133559,1235029,1212946,1232408,1076087,965009,1163613,1060576,1060576,617068,723824,745525,876814,949260,556232,772443,660420,298174,492289,980817,3056,0.994623404680424,1.01950547134618,1.00710552415657,0.840822847230173,0.000112249820909,0.014076824975767,0.009517510060085,0.006751174544696,0.029800076293786,6.53186791196846E-05,10.87,0.0,12.81,0.100858034586,0.026603482197962,40,HEJ \ No newline at end of file diff --git a/tests/fixtures/valid_fluffy.csv b/tests/fixtures/valid_fluffy.csv new file mode 100644 index 00000000..c8f9aec4 --- /dev/null +++ b/tests/fixtures/valid_fluffy.csv @@ -0,0 +1,4 @@ +SampleID,SequencingDate,SampleType,Flowcell,Description,SampleProject,Index1,Index2,Library_nM,QCFlag,Zscore_13,Zscore_18,Zscore_21,Zscore_X,Ratio_13,Ratio_18,Ratio_21,Ratio_X,Ratio_Y,MappedReads,GC_Dropout,AT_Dropout,Chr1_Ratio,Chr2_Ratio,Chr3_Ratio,Chr4_Ratio,Chr5_Ratio,Chr6_Ratio,Chr7_Ratio,Chr8_Ratio,Chr9_Ratio,Chr10_Ratio,Chr11_Ratio,Chr12_Ratio,Chr14_Ratio,Chr15_Ratio,Chr16_Ratio,Chr17_Ratio,Chr19_Ratio,Chr20_Ratio,Chr22_Ratio,Chr1,Chr2,Chr3,Chr4,Chr5,Chr6,Chr7,Chr8,Chr9,Chr10,Chr11,Chr12,Chr13,Chr14,Chr15,Chr16,Chr17,Chr18,Chr19,Chr20,Chr21,Chr22,ChrX,ChrY,Median_13,Median_18,Median_21,Median_X,Median_Y,Stdev_13,Stdev_18,Stdev_21,Stdev_X,Stdev_Y,FF_Formatted,FFY,FFX,DuplicationRate,Bin2BinVariance,UnfilteredCNVcalls,CNVSegment +2020-07547-02,2020-03-10,,,,201862,CAGGTTCC,CGAACACC,,,1.56645731780181,5.7935003491953,-0.034484656889219,-22.4484387749091,1.00392976488,1.0145378044,0.999592517988,0.895534547566,0.00010705927715,29208118,1.2E-05,33.401583,0.995645196832,0.993212778561,0.962823087616,1.06202342547,0.99049923318,1.00919569724,1.03740345147,0.98228215486,0.980352236205,0.990497684064,0.961517632949,1.01303209758,0.986809900603,0.995026518647,0.996175293127,1.03451169241,1.04821216494,0.95282466333,1.04782507766,2029934,1858430,1420819,1157784,1259965,1230164,1245957,1088639,967314,1175662,1072024,1072024,626410,735444,748838,876563,935427,569534,745983,662560,297366,486441,1010813,3127,0.994623404680424,1.01950547134618,1.00710552415657,0.840822847230173,0.000112249820909,0.014076824975767,0.009517510060085,0.006751174544696,0.029800076293786,6.53186791196846E-05,9.804,0.0,10.44,0.1005291748,0.024194141925153,47,Found +2020-07649-02,2020-03-10,,,,201862,TTACCTGG,ACGATTGC,,,1.38366647361139,5.81331308777849,3.70734623274885,-35.5689808856014,1.00345380527,1.01458753978,1.01449670237,0.834596652286,0.000183798072485,19619357,0.000187,33.815994,0.990601777174,0.995167604367,0.962072961919,1.05777118464,0.993385479259,1.01240316831,1.03903325686,0.991366632222,0.983488534995,0.993293317385,0.95505612457,1.01314923931,0.973499284889,0.991858047435,0.988736547703,1.03613747718,1.05346928988,0.947528223568,1.03829176495,1354513,1243481,947437,772255,840547,827926,833468,730865,648361,787219,716799,716799,419651,486482,500012,580410,626225,378761,503768,442069,201359,324729,645131,3606,0.994623404680424,1.01950547134618,1.00710552415657,0.840822847230173,0.000112249820909,0.014076824975767,0.009517510060085,0.006751174544696,0.029800076293786,6.53186791196846E-05,10.56,4.6048225214,17.59,0.103652428568,0.023080843735203,42,Found +2020-07455-02,2020-03-10,,,,201862,CTATCATT,GACCACCT,,,1.19771475047014,5.9936781609917,0.172213067367254,-27.3372140187147,1.00296961522,1.01504030531,1.00041582109,0.872828803296,0.000101746653098,30035386,9.7E-05,34.339258,0.994369406522,0.994050565447,0.963985487377,1.06586994,0.991903746769,1.01417054338,1.04011435706,0.989841130219,0.987191423231,0.990561923283,0.952624100278,1.01511372968,0.975874349841,0.997388670577,0.996128439123,1.04577514591,1.06297625217,0.947324666538,1.04127965945,2011620,1827738,1395600,1133559,1235029,1212946,1232408,1076087,965009,1163613,1060576,1060576,617068,723824,745525,876814,949260,556232,772443,660420,298174,492289,980817,3056,0.994623404680424,1.01950547134618,1.00710552415657,0.840822847230173,0.000112249820909,0.014076824975767,0.009517510060085,0.006751174544696,0.029800076293786,6.53186791196846E-05,10.87,0.0,12.81,0.100858034586,0.026603482197962,40,Found \ No newline at end of file diff --git a/tests/parse/test_parse_batch.py b/tests/parse/test_parse_batch.py new file mode 100644 index 00000000..65aec0e1 --- /dev/null +++ b/tests/parse/test_parse_batch.py @@ -0,0 +1,34 @@ + +from NIPTool.parse.batch import parse_batch_file +import pytest +from NIPTool.exeptions import MissingResultsError, FileValidationError + +def test_build_application_tag(valid_csv): + # GIVEN a valid csv file + + # WHEN running parse_batch_file + results = parse_batch_file(valid_csv) + + # THEN assert results is a list and it has length 3 + assert isinstance(results, list) + assert len(results)==3 + + +def test_parse_batch_file_with_missing_data(invalid_csv): + # GIVEN a csv file with missing SampleID + + # WHEN running parse_batch_file + + # THEN assert FileValidationError + with pytest.raises(FileValidationError): + parse_batch_file(invalid_csv) + + +def test_parse_batch_file_with_missing_file(): + # GIVEN a non existing csv file + + # WHEN running parse_batch_file + + # THEN assert MissingResultsError + with pytest.raises(MissingResultsError): + parse_batch_file('file_path') \ No newline at end of file From 772d53b29e5947c54b0dee059fc1158cdd497635 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 7 Sep 2020 12:51:07 +0200 Subject: [PATCH 3/8] more tests --- NIPTool/build/batch.py | 2 +- tests/build/test_build_batch.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tests/build/test_build_batch.py diff --git a/NIPTool/build/batch.py b/NIPTool/build/batch.py index fa7c70f8..fa3f2386 100644 --- a/NIPTool/build/batch.py +++ b/NIPTool/build/batch.py @@ -1,4 +1,4 @@ -from NIPTool.constants.constants import BATCH_KEYS +from NIPTool.models.constants import BATCH_KEYS def build_batch(batch_data: dict): diff --git a/tests/build/test_build_batch.py b/tests/build/test_build_batch.py new file mode 100644 index 00000000..0980522f --- /dev/null +++ b/tests/build/test_build_batch.py @@ -0,0 +1,37 @@ +from NIPTool.build.batch import build_batch +import pytest + + +def test_build_batch(): + # GIVEN a batch_data with requiered key 'SampleProject' + batch_data = { + "Median_18": 1.01950547134618, + "SampleProject": 201862, + "Stdev_13": 0.009517510060085, + } + + # WHEN building a mongo batch + mongo_batch = build_batch(batch_data=batch_data) + + # THEN the mongo_batch has a key "_id" with the value of "SampleProject" + assert mongo_batch == { + "_id": "201862", + "Median_18": 1.01950547134618, + "Stdev_13": 0.009517510060085, + } + + +def test_build_batch_wrong_keys(): + # GIVEN a batch_data with not accepted keys: key1 key2 key3 + batch_data = { + "SampleProject": 201862, + "key1": " ", + "key2": 201862, + "key3": -10.1836097044367, + } + + # WHEN building a mongo batch + mongo_batch = build_batch(batch_data=batch_data) + + # THEN the unaccepted keys will not be part of the mongo_batch" + assert mongo_batch == {"_id": "201862"} From 578bc00087cf913bc7495513f4495dcd3859e665 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 7 Sep 2020 13:19:42 +0200 Subject: [PATCH 4/8] cli tests --- tests/commands/load/test_load_batch.py | 56 ++++++++++++++++++++++++++ tests/conftest.py | 25 ++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tests/commands/load/test_load_batch.py diff --git a/tests/commands/load/test_load_batch.py b/tests/commands/load/test_load_batch.py new file mode 100644 index 00000000..09e97049 --- /dev/null +++ b/tests/commands/load/test_load_batch.py @@ -0,0 +1,56 @@ +from NIPTool.commands.load.batch import batch +from NIPTool.server import create_app +from NIPTool.commands.base import cli +from NIPTool.adapter.plugin import NiptAdapter + + +app = create_app(test=True) + + +def test_batch_valid_file(database, valid_csv): + app.db = database + app.adapter = NiptAdapter(database.client, db_name=database.name) + + # GIVEN a valid csv file with three samples + + # WHEN loading the batch file with correct foramted input string + runner = app.test_cli_runner() + result = runner.invoke(cli, ["load", "batch", "-b", valid_csv]) + + # THEN assert the new apptags should be added to the colleciton + assert app.adapter.sample_collection.estimated_document_count() == 3 + assert app.adapter.batch_collection.estimated_document_count() == 1 + + +def test_batch_invalid_file(database, invalid_csv): + app.db = database + app.adapter = NiptAdapter(database.client, db_name=database.name) + + # GIVEN a invalid csv file + + # WHEN loading the batch file with correct foramted input string + runner = app.test_cli_runner() + result = runner.invoke(cli, ["load", "batch", "-b", invalid_csv]) + + # THEN assert nothing added to sample or batch collections + # THEN assert Badly formated csv! Can not load. Exiting. + assert app.adapter.sample_collection.estimated_document_count() == 0 + assert app.adapter.batch_collection.estimated_document_count() == 0 + assert result.exit_code == 1 + + +def test_batch_no_file(database): + app.db = database + app.adapter = NiptAdapter(database.client, db_name=database.name) + + # GIVEN a invalid csv file + + # WHEN loading the batch file with correct foramted input string + runner = app.test_cli_runner() + result = runner.invoke(cli, ["load", "batch", "-b", "wrong/path"]) + + # THEN assert nothing added to sample or batch collections + # THEN assert Badly formated csv! Can not load. Exiting. + assert app.adapter.sample_collection.estimated_document_count() == 0 + assert app.adapter.batch_collection.estimated_document_count() == 0 + assert result.exit_code == 1 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 8cf8cc98..5912a902 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,30 @@ import pytest +from mongomock import MongoClient + +DATABASE = 'testdb' + +@pytest.fixture(scope='function') +def pymongo_client(request): + """Get a client to the mongo database""" + + mock_client = MongoClient() + + def teardown(): + mock_client.drop_database(DATABASE) + + request.addfinalizer(teardown) + return mock_client + + +@pytest.fixture(scope='function') +def database(request, pymongo_client): + """Get an adapter connected to mongo database""" + + mongo_client = pymongo_client + database = mongo_client[DATABASE] + return database + ########################################## ###### fixture files for input csv ###### From 03979b4f1b749ba0f11641eceef89e7c63e64145 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 7 Sep 2020 13:58:14 +0200 Subject: [PATCH 5/8] generalizing build documents --- NIPTool/build/batch.py | 12 ------------ NIPTool/build/document.py | 32 ++++++++++++++++++++++++++++++++ NIPTool/build/sample.py | 30 ------------------------------ NIPTool/load/batch.py | 3 +-- NIPTool/models/converters.py | 26 +++++++++++++++++++++++++- tests/build/test_build_batch.py | 2 +- tests/build/test_build_sample.py | 2 +- 7 files changed, 60 insertions(+), 47 deletions(-) delete mode 100644 NIPTool/build/batch.py create mode 100644 NIPTool/build/document.py delete mode 100644 NIPTool/build/sample.py diff --git a/NIPTool/build/batch.py b/NIPTool/build/batch.py deleted file mode 100644 index fa3f2386..00000000 --- a/NIPTool/build/batch.py +++ /dev/null @@ -1,12 +0,0 @@ -from NIPTool.models.constants import BATCH_KEYS - - -def build_batch(batch_data: dict): - """Builds a document for the batch collection""" - - batch_document = {'_id': str(batch_data.get('SampleProject'))} - for key in BATCH_KEYS: - if batch_data.get(key) is not None: - batch_document[key] = batch_data.get(key) - - return batch_document \ No newline at end of file diff --git a/NIPTool/build/document.py b/NIPTool/build/document.py new file mode 100644 index 00000000..86ac26e7 --- /dev/null +++ b/NIPTool/build/document.py @@ -0,0 +1,32 @@ +from NIPTool.models.constants import SAMPLE_KEYS, BATCH_KEYS +from NIPTool.models.converters import convert + + +def build_document(csv_data: dict, document_keys: list) -> dict: + + document = {} + for key in document_keys: + value = csv_data.get(key) + value = convert(key, value) + if value is None: + continue + document[key] = value + + return document + +def build_sample(sample_data: dict) -> dict: + """Builds a document for the sample collection""" + + sample = build_document(sample_data, SAMPLE_KEYS) + sample["_id"] = sample_data.get("SampleID") + + return sample + + +def build_batch(batch_data: dict) -> dict: + """Builds a document for the batch collection""" + + batch = build_document(batch_data, BATCH_KEYS) + batch["_id"] = str(batch_data.get('SampleProject')) + + return batch \ No newline at end of file diff --git a/NIPTool/build/sample.py b/NIPTool/build/sample.py deleted file mode 100644 index 33312d5d..00000000 --- a/NIPTool/build/sample.py +++ /dev/null @@ -1,30 +0,0 @@ -from NIPTool.models.constants import SAMPLE_KEYS -from NIPTool.models.converters import CONVERTERS - - -def convert(key, value): - """Convert values according to the converter model""" - - if value is None: - return value - - for function, keys in CONVERTERS.items(): - if key in keys: - return function(value) - - return value - - -def build_sample(sample_data: dict): - """Builds a document for the sample collection""" - - sample = {"_id": sample_data.get("SampleID")} - - for key in SAMPLE_KEYS: - value = sample_data.get(key) - value = convert(key, value) - if value is None: - continue - sample[key] = value - - return sample diff --git a/NIPTool/load/batch.py b/NIPTool/load/batch.py index 9b3428d1..36cf812e 100644 --- a/NIPTool/load/batch.py +++ b/NIPTool/load/batch.py @@ -1,6 +1,5 @@ import logging -from NIPTool.build.sample import build_sample -from NIPTool.build.batch import build_batch +from NIPTool.build.document import build_sample, build_batch from NIPTool.parse.batch import parse_batch_file LOG = logging.getLogger(__name__) diff --git a/NIPTool/models/converters.py b/NIPTool/models/converters.py index a3998f89..e4843859 100644 --- a/NIPTool/models/converters.py +++ b/NIPTool/models/converters.py @@ -9,4 +9,28 @@ def empty_str(x): return x -CONVERTERS = {str: ["SampleProject"], empty_str: ["SampleType", "Description"]} +CONVERTERS = { + str: ["SampleProject"], + empty_str: [ + "SampleType", + "Description", + "Library_nM", + "Index1", + "Index2", + "CNVSegment", + "Flowcell", + "QCFlag", + ], +} + +def convert(key, value): + """Convert values according to the converter model""" + + if value is None: + return value + + for function, keys in CONVERTERS.items(): + if key in keys: + return function(value) + + return value diff --git a/tests/build/test_build_batch.py b/tests/build/test_build_batch.py index 0980522f..54d2f490 100644 --- a/tests/build/test_build_batch.py +++ b/tests/build/test_build_batch.py @@ -1,4 +1,4 @@ -from NIPTool.build.batch import build_batch +from NIPTool.build.document import build_batch import pytest diff --git a/tests/build/test_build_sample.py b/tests/build/test_build_sample.py index 0fcf6f0f..145cb522 100644 --- a/tests/build/test_build_sample.py +++ b/tests/build/test_build_sample.py @@ -1,4 +1,4 @@ -from NIPTool.build.sample import build_sample +from NIPTool.build.document import build_sample import pytest From feb1528782bf85b3bca2f63f904da600232e3d36 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 7 Sep 2020 14:35:22 +0200 Subject: [PATCH 6/8] formating --- NIPTool/build/document.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NIPTool/build/document.py b/NIPTool/build/document.py index 86ac26e7..d89e8af0 100644 --- a/NIPTool/build/document.py +++ b/NIPTool/build/document.py @@ -3,6 +3,7 @@ def build_document(csv_data: dict, document_keys: list) -> dict: + """Build a general document based on convert models""" document = {} for key in document_keys: @@ -14,6 +15,7 @@ def build_document(csv_data: dict, document_keys: list) -> dict: return document + def build_sample(sample_data: dict) -> dict: """Builds a document for the sample collection""" @@ -27,6 +29,6 @@ def build_batch(batch_data: dict) -> dict: """Builds a document for the batch collection""" batch = build_document(batch_data, BATCH_KEYS) - batch["_id"] = str(batch_data.get('SampleProject')) - + batch["_id"] = str(batch_data.get("SampleProject")) + return batch \ No newline at end of file From ce03371d1d1b771d951857d09f97432106bcd68b Mon Sep 17 00:00:00 2001 From: mayabrandi Date: Tue, 8 Sep 2020 09:09:45 +0200 Subject: [PATCH 7/8] Update NIPTool/models/converters.py Co-authored-by: Patrik Grenfeldt --- NIPTool/models/converters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NIPTool/models/converters.py b/NIPTool/models/converters.py index e4843859..53c8ce41 100644 --- a/NIPTool/models/converters.py +++ b/NIPTool/models/converters.py @@ -27,7 +27,7 @@ def convert(key, value): """Convert values according to the converter model""" if value is None: - return value + return None for function, keys in CONVERTERS.items(): if key in keys: From 86ff7ec391d7581a920e432aac1dcb62779ce3b0 Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 8 Sep 2020 11:13:30 +0200 Subject: [PATCH 8/8] Adding parametrized tests and removing convert model --- NIPTool/build/document.py | 17 +++++- NIPTool/models/constants.py | 1 - NIPTool/models/converters.py | 36 ------------ .../server/templates/sample/sample_tris.html | 2 +- tests/build/test_build_batch.py | 37 ++++++++++++ tests/build/test_build_sample.py | 58 ++++++++++++++++++- 6 files changed, 107 insertions(+), 44 deletions(-) delete mode 100644 NIPTool/models/converters.py diff --git a/NIPTool/build/document.py b/NIPTool/build/document.py index d89e8af0..465ab6b1 100644 --- a/NIPTool/build/document.py +++ b/NIPTool/build/document.py @@ -1,14 +1,23 @@ from NIPTool.models.constants import SAMPLE_KEYS, BATCH_KEYS -from NIPTool.models.converters import convert +from typing import Optional + +def empty_str_to_none(x: str) -> Optional[str]: + """Convert empty string to None""" + + x = x.strip() + if not x: + return None + return x def build_document(csv_data: dict, document_keys: list) -> dict: - """Build a general document based on convert models""" + """Build a general document""" document = {} for key in document_keys: value = csv_data.get(key) - value = convert(key, value) + if isinstance(value, str): + value = empty_str_to_none(value) if value is None: continue document[key] = value @@ -20,6 +29,8 @@ def build_sample(sample_data: dict) -> dict: """Builds a document for the sample collection""" sample = build_document(sample_data, SAMPLE_KEYS) + if sample.get("SampleProject"): + sample["SampleProject"] = str(sample["SampleProject"]) sample["_id"] = sample_data.get("SampleID") return sample diff --git a/NIPTool/models/constants.py b/NIPTool/models/constants.py index 6f592ad2..971cf779 100644 --- a/NIPTool/models/constants.py +++ b/NIPTool/models/constants.py @@ -1,5 +1,4 @@ SAMPLE_KEYS = [ - "SampleID", "SampleType", "Description", "SampleProject", diff --git a/NIPTool/models/converters.py b/NIPTool/models/converters.py deleted file mode 100644 index e4843859..00000000 --- a/NIPTool/models/converters.py +++ /dev/null @@ -1,36 +0,0 @@ -def empty_str(x): - """Convert empty string to None""" - - if not isinstance(x, str): - return x - x = x.strip() - if not x: - return None - return x - - -CONVERTERS = { - str: ["SampleProject"], - empty_str: [ - "SampleType", - "Description", - "Library_nM", - "Index1", - "Index2", - "CNVSegment", - "Flowcell", - "QCFlag", - ], -} - -def convert(key, value): - """Convert values according to the converter model""" - - if value is None: - return value - - for function, keys in CONVERTERS.items(): - if key in keys: - return function(value) - - return value diff --git a/NIPTool/server/templates/sample/sample_tris.html b/NIPTool/server/templates/sample/sample_tris.html index 1ac0e987..68e579a6 100644 --- a/NIPTool/server/templates/sample/sample_tris.html +++ b/NIPTool/server/templates/sample/sample_tris.html @@ -23,7 +23,7 @@ {% block scripts %}