Skip to content

Commit

Permalink
Merge fb6f1cc into 440efcf
Browse files Browse the repository at this point in the history
  • Loading branch information
andreacosolo committed Oct 6, 2022
2 parents 440efcf + fb6f1cc commit 70a1d65
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 8 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ Submit4DN
Change Log
----------

3.1.0
=====

`PR 161 <https://github.com/4dn-dcic/Submit4DN/pull/161>`_

* Added documentation regarding how to install Submit4DN on Windows machines in
a virtual environment. There is a bug in ``awscli`` or in ``pyenv-win``, which
requires to adjust the installation instructions for this use case (see
troubleshooting in ``README.md`` for details).

* Added support for ``~`` in paths for file and attachment upload.

* Bug fix: a ``show`` command was giving intermittent errors.

3.0.1
=====
Expand Down
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ This package is not supported on older Python versions and is supported and test

It is recommended to install this package in a virtual environment to avoid dependency clashes.

Problems have been reported on recent MacOS X versions having to do with the inablity to find `libmagic`,
Problems have been reported on recent MacOS X and Windows versions having to do with the inablity to find `libmagic`,
a C library to check file types that is used by the `python-magic` library.

eg. `ImportError: failed to find libmagic. Check your installation`
Expand All @@ -52,6 +52,24 @@ brew install libmagic
brew link libmagic (if the link is already created is going to fail, don't worry about that)
```

Additionally, problems have been reported on Windows when installing Submit4DN
inside a virtual environment, due to `aws` trying to use the global python instead
of the python inside the virtual environment.

The workaround, then, because it’s actually OK if `aws` doesn’t use the python
inside the virtual environment, is to just install `awscli` in the global
environment before entering the virtual environment. Or if you discover the
problem after you’re in, then go outside, install `awscli`, and re-enter the
virtual environment.

```
deactivate
pip install awscli
VENV\scripts\activate # replace VENV with your virtual environment name
aws --version # this is to test that awscli is now installed correctly
```


## Connecting to the Data Portal
To be able to use the provided tools, you need to generate an AccessKey on the [data portal](https://data.4dnucleome.org/).
If you do not yet have access, please contact [4DN Data Wranglers](mailto:support@4dnucleome.org)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "Submit4DN"
version = "3.0.1"
version = "3.1.0"
description = "Utility package for submitting data to the 4DN Data Portal"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down
22 changes: 21 additions & 1 deletion tests/test_import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,20 @@ def test_attachment_ftp_to_nowhere():
assert "urlopen error" in str(e.value)


def convert_to_path_with_tilde(string_path):
'''Helper function that introduces ~ in a valid path,
somehow the inverse of pathlib.Path.expanduser()'''
path = pp.Path(string_path)
absolute_path = path.resolve()
home = absolute_path.home()
string_path_with_tilde = str(absolute_path).replace(str(home), '~')
return string_path_with_tilde


@pytest.mark.file_operation
def test_md5():
md5_keypairs = imp.md5('./tests/data_files/keypairs.json')
path = convert_to_path_with_tilde('./tests/data_files/keypairs.json')
md5_keypairs = imp.md5(path)
assert md5_keypairs == "19d43267b642fe1868e3c136a2ee06f2"


Expand All @@ -40,6 +51,15 @@ def test_attachment_pdf():
assert attach['href'].startswith('data:application/pdf;base64')


@pytest.mark.file_operation
def test_attachment_expanduser_path():
path = convert_to_path_with_tilde("./tests/data_files/test.pdf")
attach = imp.attachment(path)
assert attach['download'] == 'test.pdf'
assert attach['type'] == 'application/pdf'
assert attach['href'].startswith('data:application/pdf;base64')


@pytest.mark.file_operation
def test_attachment_image_wrong_extension():
with pytest.raises(ValueError) as excinfo:
Expand Down
14 changes: 9 additions & 5 deletions wranglertools/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def getArgs(): # pragma: no cover
]


def md5(path):
def md5(path_string):
path = pp.Path(path_string).expanduser()
md5sum = hashlib.md5()
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(1024*1024), b''):
Expand Down Expand Up @@ -175,6 +176,8 @@ def attachment(path):
'image/tiff',
)
ftp_attach = False
if path.startswith('~'):
path = str(pp.Path(path).expanduser())
if not pp.Path(path).is_file():
# if the path does not exist, check if it works as a URL
if path.startswith("ftp://"): # grab the file from ftp
Expand Down Expand Up @@ -211,7 +214,7 @@ def attachment(path):
filename = pp.PurePath(path).name
guessed_mime = mimetypes.guess_type(path)[0]
detected_mime = magic.from_file(path, mime=True)
# NOTE: this whole guesssing and detecting bit falls apart for zip files which seems a bit dodgy
# NOTE: this whole guessing and detecting bit falls apart for zip files which seems a bit dodgy
# some .zip files are detected as generic application/octet-stream but don't see a good way to verify
# basically relying on extension with a little verification by magic for most file types
if guessed_mime not in ALLOWED_MIMES:
Expand Down Expand Up @@ -1451,8 +1454,9 @@ def upload_file(creds, path): # pragma: no cover
# ~12-15s/GB from AWS Ireland - AWS Oregon
print("Uploading file.")
start = time.time()
path_object = pp.Path(path).expanduser()
try:
source = path
source = path_object
target = creds['upload_url']
print("Going to upload {} to {}.".format(source, target))
command = ['aws', 's3', 'cp']
Expand All @@ -1466,7 +1470,7 @@ def upload_file(creds, path): # pragma: no cover
else:
end = time.time()
duration = end - start
show("Uploaded in %.2f seconds" % duration)
print("Uploaded in %.2f seconds" % duration)


def running_on_windows_native():
Expand Down Expand Up @@ -1524,7 +1528,7 @@ def _verify_and_return_item(item, connection):
def cabin_cross_check(connection, patchall, update, infile, remote, lab=None, award=None):
"""Set of check for connection, file, dryrun, and prompt."""
print("Running on: {server}".format(server=connection.key['server']))
# check input file (xls)
# check input file (xlsx)
if not pp.Path(infile).is_file():
print(f"File {infile} not found!")
sys.exit(1)
Expand Down

0 comments on commit 70a1d65

Please sign in to comment.