diff --git a/Data_Files/Rao_et_al_2014/fieldsRao.xls b/Data_Files/Rao_et_al_2014/fieldsRao.xls index ea5ec5b4..ad445315 100644 Binary files a/Data_Files/Rao_et_al_2014/fieldsRao.xls and b/Data_Files/Rao_et_al_2014/fieldsRao.xls differ diff --git a/README.md b/README.md index 0afa459a..d80390ec 100644 --- a/README.md +++ b/README.md @@ -119,3 +119,24 @@ you to update the release number, then tag the code with that version number and push it to github, which will trigger travis to build and test and if tests pass it will deploy to production version of pypi. Note that travis will automatically deploy the new version if you push a tag to git. + +# Pytest +Every function is tested by pytest implementation. It can be run in terminal in submit4dn folder by: + + py.test + +Some tests need internet access, and labeled with "webtest" mark. + +Some tests have file operations, and labeled with "file_operation" mark. + +To run the mark tests, or exclude them from the tests you can use the following commands: + + # Run all tests + py.test + + # Run only webtest + py.test -m webtest + + # Run only tests with file_opration + py.test -m file_operation + diff --git a/tests/data_files/Document_insert.xls b/tests/data_files/Document_insert.xls new file mode 100644 index 00000000..33c7d16f Binary files /dev/null and b/tests/data_files/Document_insert.xls differ diff --git a/tests/data_files/Exp_HiC_insert.xls b/tests/data_files/Exp_HiC_insert.xls new file mode 100644 index 00000000..a21d6032 Binary files /dev/null and b/tests/data_files/Exp_HiC_insert.xls differ diff --git a/tests/data_files/Vender_ordered_reference.xls b/tests/data_files/Vender_ordered_reference.xls deleted file mode 100644 index 7988d913..00000000 Binary files a/tests/data_files/Vender_ordered_reference.xls and /dev/null differ diff --git a/tests/data_files/Vendor.xls b/tests/data_files/Vendor.xls index 21078794..cf4d5c75 100644 Binary files a/tests/data_files/Vendor.xls and b/tests/data_files/Vendor.xls differ diff --git a/tests/data_files/Vendor_insert.xls b/tests/data_files/Vendor_insert.xls new file mode 100644 index 00000000..62eb269e Binary files /dev/null and b/tests/data_files/Vendor_insert.xls differ diff --git a/tests/data_files/Vendor_ordered reference.xls b/tests/data_files/Vendor_ordered reference.xls new file mode 100644 index 00000000..8c983907 Binary files /dev/null and b/tests/data_files/Vendor_ordered reference.xls differ diff --git a/tests/data_files/example.fastq.gz b/tests/data_files/example.fastq.gz new file mode 100644 index 00000000..54e47c40 Binary files /dev/null and b/tests/data_files/example.fastq.gz differ diff --git a/tests/test_fdnDCIC.py b/tests/test_fdnDCIC.py index 2d1f087a..630f0b39 100644 --- a/tests/test_fdnDCIC.py +++ b/tests/test_fdnDCIC.py @@ -1,5 +1,6 @@ import wranglertools.fdnDCIC as fdnDCIC import json +import pytest # test data is in conftest.py keypairs = { @@ -23,6 +24,7 @@ def test_key(): assert isinstance(key.authid, str) +@pytest.mark.file_operation def test_key_file(): key = fdnDCIC.FDN_Key('./tests/data_files/keypairs.json', "default") assert(key) @@ -57,11 +59,13 @@ def test_FDN_url(): assert t_url == expected_url[n] +@pytest.mark.file_operation def test_md5(): md5_keypairs = fdnDCIC.md5('./tests/data_files/keypairs.json') assert md5_keypairs == "19d43267b642fe1868e3c136a2ee06f2" +@pytest.mark.webtest def test_get_FDN(connection_public): # test the schema retrival with public connection award_schema = fdnDCIC.get_FDN("/profiles/award.json", connection_public, frame="object") @@ -209,16 +213,41 @@ def test_fetch_all_items_mock(connection, mocker, returned_vendor_items): assert vendor[0].startswith("#") +def xls_to_list(xls_file, sheet): + import xlrd + return_list = [] + wb = xlrd.open_workbook(xls_file) + read_sheet = wb.sheet_by_name(sheet) + cols = read_sheet.ncols + rows = read_sheet.nrows + for row_idx in range(rows): + row_val = [] + for col_idx in range(cols): + cell_value = str(read_sheet.cell(row_idx, col_idx)) + + row_val.append(cell_value) + return_list.append(row_val) + return return_list + + +@pytest.mark.file_operation def test_order_FDN_mock(connection, mocker, returned_vendor_items): + vendor_file = './tests/data_files/Vendor.xls' + ordered_file = './tests/data_files/Vendor_ordered.xls' + ref_file = './tests/data_files/Vendor_ordered reference.xls' import os try: - os.remove("./tests/data_files/Vendor_ordered.xls") - except: + os.remove(ordered_file) + except OSError: pass + with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_items): - fdnDCIC.order_FDN('./tests/data_files/Vendor.xls', connection) - assert os.path.isfile('./tests/data_files/Vendor_ordered.xls') + fdnDCIC.order_FDN(vendor_file, connection) + assert os.path.isfile(ordered_file) + ord_list = xls_to_list(ordered_file, "Vendor") + ref_list = xls_to_list(ref_file, "Vendor") + assert ord_list == ref_list try: - os.remove("./tests/data_files/Vendor_ordered.xls") - except: + os.remove(ordered_file) + except OSError: pass diff --git a/tests/test_get_field_info.py b/tests/test_get_field_info.py index 054ac53a..26a44f5b 100644 --- a/tests/test_get_field_info.py +++ b/tests/test_get_field_info.py @@ -1,4 +1,5 @@ import wranglertools.get_field_info as gfi +import pytest # test data is in conftest.py @@ -93,11 +94,6 @@ def test_build_field_list_embeds_with_dots(embed_properties): assert field_list[1].name.startswith('experiment_relation') -def test_get_uploadable_fields(connection_public): - field_dict = gfi.get_uploadable_fields(connection_public, ['Vendor']) - assert field_dict - - def test_get_uploadable_fields_mock(connection, mocker, returned_vendor_schema): with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_schema): field_dict = gfi.get_uploadable_fields(connection, ['Vendor']) @@ -109,8 +105,9 @@ def test_get_uploadable_fields_mock(connection, mocker, returned_vendor_schema): assert field.enum is not None +@pytest.mark.file_operation def test_create_xls(connection, mocker, returned_vendor_schema): - xls_file = "./tests/data_files/Vendor_ordered.xls" + xls_file = "./tests/data_files/Vendor_gfi_test.xls" import os try: os.remove(xls_file) diff --git a/tests/test_import_data.py b/tests/test_import_data.py index a5429b52..cdb5a826 100644 --- a/tests/test_import_data.py +++ b/tests/test_import_data.py @@ -3,6 +3,7 @@ # test data is in conftest.py +@pytest.mark.file_operation def test_attachment_image(): attach = imp.attachment("./tests/data_files/test.jpg") assert attach['height'] == 1080 @@ -12,6 +13,7 @@ def test_attachment_image(): assert attach['href'].startswith('data:image/jpeg;base64') +@pytest.mark.file_operation def test_attachment_pdf(): attach = imp.attachment("./tests/data_files/test.pdf") assert attach['download'] == 'test.pdf' @@ -19,18 +21,21 @@ def test_attachment_pdf(): assert attach['href'].startswith('data:application/pdf;base64') +@pytest.mark.file_operation def test_attachment_image_wrong_extension(): with pytest.raises(ValueError) as excinfo: imp.attachment("./tests/data_files/test_jpeg.tiff") assert str(excinfo.value) == 'Wrong extension for image/jpeg: test_jpeg.tiff' +@pytest.mark.file_operation def test_attachment_text_wrong_extension(): with pytest.raises(ValueError) as excinfo: imp.attachment("./tests/data_files/test_txt.pdf") assert str(excinfo.value) == 'Wrong extension for text/plain: test_txt.pdf' +@pytest.mark.webtest def test_attachment_url(): import os attach = imp.attachment("https://wordpress.org/plugins/about/readme.txt") @@ -43,34 +48,38 @@ def test_attachment_url(): pass +@pytest.mark.file_operation def test_attachment_not_accepted(): with pytest.raises(ValueError) as excinfo: imp.attachment("./tests/data_files/test.mp3") assert str(excinfo.value) == 'Unknown file type for test.mp3' +@pytest.mark.file_operation def test_reader(vendor_raw_xls_fields): readxls = imp.reader('./tests/data_files/Vendor.xls') for n, row in enumerate(readxls): assert row == vendor_raw_xls_fields[n] +@pytest.mark.file_operation def test_reader_with_sheetname(vendor_raw_xls_fields): readxls = imp.reader('./tests/data_files/Vendor.xls', 'Vendor') for n, row in enumerate(readxls): assert row == vendor_raw_xls_fields[n] +@pytest.mark.file_operation def test_reader_wrong_sheetname(): readxls = imp.reader('./tests/data_files/Vendor.xls', 'Enzyme') list_readxls = list(readxls) assert list_readxls == [] +@pytest.mark.file_operation def test_cell_value(): readxls = imp.reader('./tests/data_files/test_cell_values.xls') list_readxls = list(readxls) - print(list_readxls) assert list_readxls == [['BOOLEAN', '1'], ['NUMBER', '10'], ['DATE', '2016-09-02']] @@ -89,6 +98,16 @@ def test_formatter_gets_lists_correctly(): assert ['1', '2', '3'] == imp.data_formatter("'[1,2,3]'", 'array') +def test_build_field_empty_is_skipped(): + assert imp.build_field('some_field', '', 'string') is None + assert imp.build_field('', 'some_data', 'string') is None + + +def test_build_field_old_stype_field(): + old_style = imp.build_field('some_field:int', "5", None) + assert old_style == {'some_field': 5} + + def test_build_patch_json_removes_empty_fields(file_metadata, file_metadata_type): post_json = imp.build_patch_json(file_metadata, file_metadata_type) @@ -131,9 +150,150 @@ def test_get_fields_type(): def test_get_existing_uuid(connection, mocker, returned_vendor_existing_item): post_jsons = [{'uuid': 'some_uuid'}, {'accession': 'some_accession'}, - {'aliases': ['some_uuid']}, + {'aliases': ['some_acc']}, {'@id': 'some_@id'}] for post_json in post_jsons: with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_existing_item): response = imp.get_existing(post_json, connection) assert response == returned_vendor_existing_item.json() + + +@pytest.mark.file_operation +def test_excel_reader_no_update_no_patchall_new_doc_with_attachment(capsys, mocker, connection): + # test new item submission without patchall update tags and check the return message + test_insert = './tests/data_files/Document_insert.xls' + dict_load = {} + with mocker.patch('wranglertools.import_data.get_existing', return_value={}): + imp.excel_reader(test_insert, 'Document', False, connection, False, dict_load) + args = imp.get_existing.call_args + attach = args[0][0]['attachment'] + assert attach['href'].startswith('data:image/jpeg;base64') + + +@pytest.mark.file_operation +def test_excel_reader_no_update_no_patchall_new_item(capsys, mocker, connection): + # test new item submission without patchall update tags and check the return message + test_insert = './tests/data_files/Vendor_insert.xls' + dict_load = {} + message = "This looks like a new row but the update flag wasn't passed, use --update to post new data" + post_json = {'lab': 'sample-lab', + 'description': 'Sample description', + 'award': 'SampleAward', + 'title': 'Sample Vendor', + 'url': 'https://www.sample_vendor.com/', + 'aliases': ['dcic:sample_vendor']} + with mocker.patch('wranglertools.import_data.get_existing', return_value={}): + imp.excel_reader(test_insert, 'Vendor', False, connection, False, dict_load) + args = imp.get_existing.call_args + assert args[0][0] == post_json + out, err = capsys.readouterr() + assert out.strip() == message + + +@pytest.mark.file_operation +def test_excel_reader_no_update_no_patchall_existing_item(capsys, mocker, connection): + # test exisiting item submission without patchall update tags and check the return message + test_insert = "./tests/data_files/Vendor_insert.xls" + dict_load = {} + message = "VENDOR: 0 out of 1 posted, 0 errors, 0 patched, 1 not patched (use --patchall to patch)." + post_json = {'lab': 'sample-lab', + 'description': 'Sample description', + 'award': 'SampleAward', + 'title': 'Sample Vendor', + 'url': 'https://www.sample_vendor.com/', + 'aliases': ['dcic:sample_vendor']} + existing_vendor = {'uuid': 'sample_uuid'} + with mocker.patch('wranglertools.import_data.get_existing', return_value=existing_vendor): + imp.excel_reader(test_insert, 'Vendor', False, connection, False, dict_load) + args = imp.get_existing.call_args + assert args[0][0] == post_json + out, err = capsys.readouterr() + assert out.strip() == message + + +@pytest.mark.file_operation +def test_excel_reader_no_update_no_patchall_new_experiment_expset_combined(mocker, connection): + # check if the separated exp set fields in experiments get combined. + test_insert = './tests/data_files/Exp_HiC_insert.xls' + dict_load = {} + post_json = {'experiment_sets': ['a', 'b', 'c', 'd'], 'aliases': ['dcic:test'], 'award': 'test-award', + 'experiment_type': 'in situ Hi-C', 'lab': 'test-lab', 'filename': 'example.fastq.gz', + 'biosample': 'test-biosample'} + with mocker.patch('wranglertools.import_data.get_existing', return_value={}): + imp.excel_reader(test_insert, 'ExperimentHiC', False, connection, False, dict_load) + args = imp.get_existing.call_args + assert args[0][0] == post_json + + +@pytest.mark.file_operation +def test_excel_reader_update_new_experiment_post_and_file_upload(capsys, mocker, connection): + # check if the separated exp set fields in experiments get combined + test_insert = './tests/data_files/Exp_HiC_insert.xls' + dict_load = {} + message0 = "calculating md5 sum for file ./tests/data_files/example.fastq.gz" + message1 = "EXPERIMENTHIC: 1 out of 1 posted, 0 errors, 0 patched." + e = {'status': 'success', '@graph': [{'uuid': 'some_uuid'}]} + # mock fetching existing info, return None + with mocker.patch('wranglertools.import_data.get_existing', return_value={}): + # mock upload file and skip + with mocker.patch('wranglertools.import_data.upload_file', return_value={}): + # mock posting new items + with mocker.patch('wranglertools.fdnDCIC.new_FDN', return_value=e): + imp.excel_reader(test_insert, 'ExperimentHiC', True, connection, False, dict_load) + args = imp.fdnDCIC.new_FDN.call_args + out, err = capsys.readouterr() + outlist = [i.strip() for i in out.split('\n') if i is not ""] + post_json_arg = args[0][2] + assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b' + assert message0 == outlist[0] + assert message1 == outlist[1] + + +@pytest.mark.file_operation +def test_excel_reader_patch_experiment_post_and_file_upload(capsys, mocker, connection): + # check if the separated exp set fields in experiments get combined + test_insert = './tests/data_files/Exp_HiC_insert.xls' + dict_load = {} + message0 = "calculating md5 sum for file ./tests/data_files/example.fastq.gz" + message1 = "EXPERIMENTHIC: 1 out of 1 posted, 0 errors, 1 patched." + existing_exp = {'uuid': 'sample_uuid'} + e = {'status': 'success', + '@graph': [{'uuid': 'some_uuid', + 'upload_credentials': 'old_creds', + 'accession': 'some_accession'}]} + # mock fetching existing info, return None + with mocker.patch('wranglertools.import_data.get_existing', return_value=existing_exp): + # mock upload file and skip + with mocker.patch('wranglertools.import_data.upload_file', return_value={}): + # mock posting new items + with mocker.patch('wranglertools.fdnDCIC.patch_FDN', return_value=e): + # mock get upload creds + with mocker.patch('wranglertools.import_data.get_upload_creds', return_value="new_creds"): + imp.excel_reader(test_insert, 'ExperimentHiC', False, connection, True, dict_load) + # check for md5sum + args = imp.fdnDCIC.patch_FDN.call_args + post_json_arg = args[0][2] + assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b' + # check for cred getting updated (from old_creds to new_creds) + args_upload = imp.upload_file.call_args + updated_post = args_upload[0][0] + assert updated_post['@graph'][0]['upload_credentials'] == 'new_creds' + # check for output message + out, err = capsys.readouterr() + outlist = [i.strip() for i in out.split('\n') if i is not ""] + assert message0 == outlist[0] + assert message1 == outlist[1] + + +def test_order_sorter(capsys): + test_list = ["ExperimentHiC", "BiosampleCellCulture", "Biosource", "Document", "Modification", + "IndividualMouse", "Biosample", "Lab", "User", "Trouble"] + ordered_list = ['User', 'Lab', 'Document', 'IndividualMouse', 'Biosource', 'Modification', + 'BiosampleCellCulture', 'Biosample', 'ExperimentHiC'] + message0 = "WARNING! Trouble sheet(s) are not loaded" + message1 = '''WARNING! Check the sheet names and the reference list "sheet_order"''' + assert ordered_list == imp.order_sorter(test_list) + out, err = capsys.readouterr() + outlist = [i.strip() for i in out.split('\n') if i is not ""] + assert message0 == outlist[0] + assert message1 == outlist[1] diff --git a/wranglertools/fdnDCIC.py b/wranglertools/fdnDCIC.py index cde69061..6f509288 100644 --- a/wranglertools/fdnDCIC.py +++ b/wranglertools/fdnDCIC.py @@ -254,14 +254,18 @@ def fetch_all_items(sheet, field_list, connection): field = field.replace("|3", "") if field == "#Field Name:": item_info.append("#") - # the attachment fields returns a dictionary + # the attachment field returns a dictionary elif field == "attachment": try: item_info.append(item.get(field)['download']) except: item_info.append("") else: - item_info.append(item.get(field, '')) + # when writing values, check for the lists and turn them into string + write_value = item.get(field, '') + if isinstance(write_value, list): + write_value = ','.join(write_value) + item_info.append(write_value) all_items.append(item_info) return all_items else: diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index c6cc9204..02ad87b9 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -200,6 +200,7 @@ def data_formatter(value, val_type): def get_field_name(field_name): """handle type at end, plus embedded objets.""" field = field_name.replace('*', '') + field = field.split(':')[0] return field.split(".")[0] @@ -242,13 +243,11 @@ class FieldInfo(object): def build_field(field, field_data, field_type): - if field_data == '' or field == '': + if not field_data or not field: return None - patch_field_name = get_field_name(field) - if field_type is None: + if not field_type: field_type = get_field_type(field) - if is_embedded_field(field): sub_field = get_sub_field(field) return build_field(sub_field, field_data, 'string') @@ -313,11 +312,13 @@ def excel_reader(datafile, sheet, update, connection, patchall, dict_patch_loadx fields2types = None keys.pop(0) row2name = types.pop(0) + if 'Type' in row2name: fields2types = dict(zip(keys, types)) for field, ftype in fields2types.items(): if 'array' in ftype: fields2types[field] = 'array' + # print(fields2types) # sys.exit() total = 0 @@ -344,7 +345,7 @@ def excel_reader(datafile, sheet, update, connection, patchall, dict_patch_loadx for set_key in ["experiment_sets|0", "experiment_sets|1", "experiment_sets|2", "experiment_sets|3"]: try: comb_sets.extend(post_json.get(set_key)) - except: + except: # pragma: no cover continue post_json.pop(set_key, None) post_json['experiment_sets'] = comb_sets @@ -373,7 +374,6 @@ def excel_reader(datafile, sheet, update, connection, patchall, dict_patch_loadx if existing_data.get("uuid"): if not patchall: not_patched += 1 - if patchall: # add the md5 if file_to_upload and not post_json.get('md5sum'): @@ -383,16 +383,11 @@ def excel_reader(datafile, sheet, update, connection, patchall, dict_patch_loadx e = fdnDCIC.patch_FDN(existing_data["uuid"], connection, post_json) if file_to_upload: # get s3 credentials - creds = get_upload_creds( - e['@graph'][0]['accession'], - connection, - e['@graph'][0]) + creds = get_upload_creds(e['@graph'][0]['accession'], connection, e['@graph'][0]) e['@graph'][0]['upload_credentials'] = creds - # upload upload_file(e, filename_to_post) - - if e["status"] == "error": + if e["status"] == "error": # pragma: no cover error += 1 elif e["status"] == "success": success += 1 @@ -411,7 +406,7 @@ def excel_reader(datafile, sheet, update, connection, patchall, dict_patch_loadx if file_to_upload: # upload the file upload_file(e, filename_to_post) - if e["status"] == "error": + if e["status"] == "error": # pragma: no cover error += 1 elif e["status"] == "success": success += 1 @@ -453,12 +448,12 @@ def upload_file(metadata_post_response, path): #################### # POST file to S3 - env = os.environ.copy() + env = os.environ.copy() # pragma: no cover env.update({ 'AWS_ACCESS_KEY_ID': creds['access_key'], 'AWS_SECRET_ACCESS_KEY': creds['secret_key'], 'AWS_SECURITY_TOKEN': creds['session_token'], - }) + }) # pragma: no cover # ~10s/GB from Stanford - AWS Oregon # ~12-15s/GB from AWS Ireland - AWS Oregon