Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gsheet #163

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Custom_scripts/
Data_Files/MicroscopyCalibration/Files/
.pytest_cache/
.python-version
wranglertools/.config/gspread/authorized_user.json
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is nothing worth tracking in .config I would gitignore the whole thing just to be safe... You also may want to provide another means to specify these credentials since folks who install from PyPi won't have this path.


# PyCharm metadata
.idea/
238 changes: 148 additions & 90 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "Submit4DN"
version = "3.1.1"
version = "3.2.0"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might consider bumping major version since you are removing FTP support.

description = "Utility package for submitting data to the 4DN Data Portal"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand All @@ -16,6 +16,7 @@ python = ">=3.7.0,<3.10"
python-magic = ">=0.4.12"
attrs = ">=21.4"
openpyxl = "^3.0.9"
gspread = "^5.6.0"
dcicutils = ">=4.0"
# awscli is not directly imported but is required for aws cp operation
awscli = "^1.22.88"
Expand Down
100 changes: 100 additions & 0 deletions tests/conftest.py

Large diffs are not rendered by default.

207 changes: 195 additions & 12 deletions tests/test_get_field_info.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
import wranglertools.get_field_info as gfi
from wranglertools.constants import CONFDIR, DEFAULT_KEYPAIR_FILE
import pytest
from operator import itemgetter
import openpyxl
from pathlib import Path
import os

# test data is in conftest.py


def test_gfi_get_args_required_default():
defaults = {
'type': ['all'],
'key': 'default',
'keyfile': CONFDIR / DEFAULT_KEYPAIR_FILE,
'debug': False,
'nodesc': False,
'comments': False,
'noenums': False,
'outfile': 'fields.xlsx',
'noadmin': False
}
args = gfi.getArgs([])
for k, v in defaults.items():
assert getattr(args, k) == v


keypairs = {
"default":
{
Expand All @@ -22,30 +40,143 @@ def mkey():
return gfi.FDN_Key(keypairs, "default")


def test_key():
def test_key_as_dict():
key = gfi.FDN_Key(keypairs, "default")
assert(key)
assert key
assert isinstance(key.con_key["server"], str)
assert isinstance(key.con_key['key'], str)
assert isinstance(key.con_key['secret'], str)


@pytest.fixture
def keydirname():
return './tests/data_files/'


@pytest.fixture
def keydir(keydirname):
return Path(keydirname)


@pytest.fixture
def keyfilename():
return 'keypairs.json'


@pytest.fixture
def keypath(keydir, keyfilename):
return keydir.joinpath(keyfilename)


@pytest.fixture
def missing_dirname():
return './missing/keydir/'


@pytest.fixture
def missing_dir(missing_dirname):
return Path(missing_dirname)


@pytest.mark.file_operation
def test_key_file(keypath):
''' testing when an actual keyfile path is provided as per --keyfile option'''
key = gfi.FDN_Key(keypath, "default")
assert key
assert isinstance(key.con_key["server"], str)
assert isinstance(key.con_key['key'], str)
assert isinstance(key.con_key['secret'], str)


@pytest.mark.file_operation
def test_key_from_env(mocker, keydirname):
''' testing getting directory where keypairs.json is stored when directory location
is set in an enviromental var - by mocking os.environ.get function
to hit this clause the expected default keypath must be passed to the constructor'''
default_keypath = CONFDIR / DEFAULT_KEYPAIR_FILE
mocker.patch('wranglertools.get_field_info.os.environ.get', return_value=keydirname)
key = gfi.FDN_Key(default_keypath, 'default')
assert key
assert isinstance(key.con_key["server"], str)
assert isinstance(key.con_key['key'], str)
assert isinstance(key.con_key['secret'], str)


def test_key_from_env_set_wrong(mocker, capsys):
''' testing when directory location is set in an enviromental var and the expected 'keypairs.json'
is not found in the director - by mocking os.environ.get function
to hit this clause the expected default keypath must be passed to the constructor'''
default_keypath = CONFDIR / DEFAULT_KEYPAIR_FILE
baddir = 'some/other/name/'
mocker.patch('wranglertools.get_field_info.os.environ.get', return_value=baddir)
key = gfi.FDN_Key(default_keypath, 'default')
out = capsys.readouterr()[0]
assert key.error
assert out == f'\n{baddir} directory set as an env variable does not contain {DEFAULT_KEYPAIR_FILE}\n\n'


@pytest.mark.file_operation
def test_key_from_default_location(mocker, keydir, keydirname, keyfilename):
'''little bit wonky as we are "mocking" the default location to be where the test file is stored
by over-riding the constant'''
mocker.patch("wranglertools.get_field_info.CONFDIR", keydir)
default_keypath = keydirname + keyfilename
key = gfi.FDN_Key(default_keypath, 'default')
assert key
assert isinstance(key.con_key["server"], str)
assert isinstance(key.con_key['key'], str)
assert isinstance(key.con_key['secret'], str)


@pytest.mark.file_operation
def test_key_file():
key = gfi.FDN_Key('./tests/data_files/keypairs.json', "default")
assert(key)
def test_key_from_home_location(mocker, keydir, keydirname, keyfilename):
'''little bit wonky as we are "mocking" the default location to be where the test file is stored
by over-riding the constant'''
mocker.patch("wranglertools.get_field_info.HOME", keydir)
default_keypath = keydirname + keyfilename
key = gfi.FDN_Key(default_keypath, 'default')
assert key
assert isinstance(key.con_key["server"], str)
assert isinstance(key.con_key['key'], str)
assert isinstance(key.con_key['secret'], str)


def test_key_default_file_missing(mocker, capsys, missing_dir, missing_dirname, keyfilename):
''' in this case we are mocking the default filename so it's not found'''
mocker.patch("wranglertools.get_field_info.CONFDIR", missing_dir)
mocker.patch("wranglertools.get_field_info.HOME", missing_dir)
mocker.patch('wranglertools.get_field_info.os.environ.get', return_value=None)
default_keypath = missing_dirname + keyfilename
key = gfi.FDN_Key(str(default_keypath), 'default')
out = capsys.readouterr()[0]
assert key.error
assert out == f"\nThe keyfile does not exist! Add keypairs.json to {missing_dir} or use the --keyfile option\n\n"


def test_key_no_keyfile(capsys):
''' this is testing something that should not be possible when running get_field_info but if using FDN_Key
in another context/script this could be relevant
'''
gfi.FDN_Key(None, 'default')
out = capsys.readouterr()[0]
assert out == "keyfile parameter missing\n"


def test_key_error_wrong_format(capsys):
gfi.FDN_Key([("key_name", "my_key")], "key_name")
out = capsys.readouterr()[0]
message = "The keyfile does not exist, check the --keyfile path or add 'keypairs.json' to your home folder"
message = (f"The keyfile [('key_name', 'my_key')] does not exist\n"
f"check the --keyfile path or add {DEFAULT_KEYPAIR_FILE} to {CONFDIR}")
assert out.strip() == message


def test_key_error_bad_keyname(capsys):
key = gfi.FDN_Key(keypairs, "nosuchkey")
out = capsys.readouterr()[0]
assert key.error
assert out == "ERROR: No key with name 'nosuchkey' found - check your keypairs file\n"


def bad_connection_will_exit():
with pytest.raises(SystemExit) as excinfo:
keypairs = {
Expand Down Expand Up @@ -192,6 +323,20 @@ def test_connection_prompt_for_lab_award_multi_lab_award(
assert connection.award == chosenaward


def test_remove_all_from_types_multitypes(mocked_args_w_type):
assert 'all' in mocked_args_w_type.type
gfi._remove_all_from_types(mocked_args_w_type)
assert 'all' not in mocked_args_w_type.type
assert 'FileFastq' in mocked_args_w_type.type


def test_remove_all_from_types_do_not_rm_when_only_type(mocked_args_w_type):
assert 'all' in mocked_args_w_type.type
gfi._remove_all_from_types(mocked_args_w_type)
assert 'all' not in mocked_args_w_type.type
assert 'FileFastq' in mocked_args_w_type.type


def test_set_award_no_lab(mocker, mkey, returned_user_me_submit_for_one_lab,
returned_lab_w_one_award):
mocker.patch('dcicutils.ff_utils.get_metadata', side_effect=[
Expand Down Expand Up @@ -344,6 +489,28 @@ def test_get_uploadable_fields_mock(connection_mock, mocker, returned_vendor_sch
assert field.enum is not None


def test_get_uploadable_fields_experiment_added_fields(connection_mock, mocker, returned_experiment_hi_c_schema):
added_field_names = ['*replicate_set', '*bio_rep_no', '*tec_rep_no']
mocker.patch('dcicutils.ff_utils.get_metadata', return_value=returned_experiment_hi_c_schema.json())
mocker.patch('dcicutils.ff_utils.search_metadata', return_value=[
{"title": "single cell Methyl Hi-C"}, {"title": "Methyl Hi-C"}, {"title": "Dilution Hi-C"},
{"title": "DNase Hi-C"}, {"title": "Micro-C"}, {"title": "single cell Hi-C"}, {"title": "sci-Hi-C"},
{"title": "TCC"}, {"title": "in situ Hi-C"}, {"title": "MC-Hi-C"}, {"title": "MC-3C"}, {"title": "sn-Hi-C"}])
field_dict = gfi.get_uploadable_fields(connection_mock, ['ExperimentHiC'])
field_list = field_dict['ExperimentHiC']
assert len([field.name for field in field_list if field.name in added_field_names]) == len(added_field_names)


def test_get_uploadable_fields_file_extra_files(connection_mock, mocker, returned_file_fastq_schema):
added_field_names = ['extra_files.file_format', 'extra_files.use_for']
mocker.patch('dcicutils.ff_utils.get_metadata', return_value=returned_file_fastq_schema.json())
mocker.patch('dcicutils.ff_utils.search_metadata', return_value=[{'file_format': 'fastq'}, {'file_format': 'tar'}])
field_dict = gfi.get_uploadable_fields(connection_mock, ['FileFastq'])
field_list = field_dict['FileFastq']
assert len([field.name for field in field_list if field.name in added_field_names]) == len(added_field_names)
assert all([field.ftype.startswith('array of embedded') for field in field_list if field.name in added_field_names])


def xls_to_list(xls_file, sheet):
"""To compare xls files to reference ones, return a sorted list of content."""
wb = openpyxl.load_workbook(xls_file)
Expand All @@ -367,12 +534,16 @@ def test_create_xlsx_default_options(connection_mock, mocker, returned_bcc_schem
'#Additional Info:', '#Description:', '#Field Name:', '#Field Type:', '*culture_start_date',
'-', '-', '-', '-', '-', '-',
'A short description of the cell culture procedure - eg. Details on culturing a preparation of K562 cells',
"Choices:['Yes', 'No']", "Choices:['cardiac muscle myoblast', 'cardiac muscle cell']", "Choices:['non synchronized', 'G1']",
"Choices:['Yes', 'No']", "Choices:['cardiac muscle myoblast', 'cardiac muscle cell']",
"Choices:['non synchronized', 'G1']",
'If a culture is synchronized the cell cycle stage from which the biosample used in an experiment is prepared',
'Item:OntologyTerm', 'Protocols including additional culture manipulations such as stem cell differentiation or cell cycle synchronization.',
'Item:OntologyTerm',
'Protocols including additional culture manipulations such as stem cell differentiation or'
' cell cycle synchronization.',
'Relevant for pluripotent and stem cell lines - set to Yes if cells have undergone in vitro differentiation',
'The resulting tissue or cell type for cells that have undergone differentiation.',
'Total number of culturing days since receiving original vial', 'YYYY-MM-DD format date for most recently thawed cell culture.',
'Total number of culturing days since receiving original vial',
'YYYY-MM-DD format date for most recently thawed cell culture.',
'array of Item:Protocol', 'culture_duration', 'culture_harvest_date', 'description', 'in_vitro_differentiated',
'integer', 'number', 'passage_number', 'protocols_additional', 'string', 'string', 'string', 'string', 'string',
'synchronization_stage', 'tissue'
Expand Down Expand Up @@ -411,7 +582,8 @@ def test_create_xlsx_non_defaults(connection_mock, mocker, returned_bcc_schema):
except OSError:
pass
mocker.patch('dcicutils.ff_utils.get_metadata', return_value=returned_bcc_schema.json())
field_dict = gfi.get_uploadable_fields(connection_mock, ['BiosampleCellCulture'], no_description=True, include_comments=True, no_enums=True)
field_dict = gfi.get_uploadable_fields(connection_mock, ['BiosampleCellCulture'], no_description=True,
include_comments=True, no_enums=True)
gfi.create_excel(field_dict, xls_file)
assert os.path.isfile(xls_file)
assert xls_to_list(xls_file, "BiosampleCellCulture") == EXPECTED
Expand Down Expand Up @@ -445,10 +617,21 @@ def test_create_xls_lookup_order(connection_mock, mocker, returned_vendor_schema
def test_get_sheet_names(capfd):
input_list = ['hic', 'experi-ment_capture-c', 'TreatmentChemical', 'Biosample']
result = gfi.get_sheet_names(input_list)
out, err = capfd.readouterr()
out, _ = capfd.readouterr()
assert result == [
'Protocol', 'Publication', 'Image', 'Biosource', 'BiosampleCellCulture',
'Biosample', 'FileFastq', 'ExperimentHiC', 'ExperimentCaptureC', 'ExperimentSetReplicate'
]
assert len(result) == len(list(set(result)))
assert 'No schema found for type TreatmentChemical' in out


def test_get_sheet_names_all():
from wranglertools.constants import SHEET_ORDER
sheet_names = SHEET_ORDER[:]
sheet_names.remove('ExperimentMic_Path')
sheet_names.remove('OntologyTerm')
count = len(sheet_names)
res = gfi.get_sheet_names(['All'])
assert len(res) == count
assert res == sheet_names