Skip to content

Commit

Permalink
Merging dependencies into master - merged into high-level-commands in…
Browse files Browse the repository at this point in the history
… last pull request. (#12)

* Installing the dependencies and file structure needed for high-level commands to actually put files into the cloud.

* Removed tests because i/o args changed quite a bit. Adding tests at the end.
* Made _mime_type function readable
* Added replica argument to upload_to_cloud script
* Changed default staging bucket to an environment variable.
* Removed scripting capability of upload_to_cloud.py
  • Loading branch information
Mackey22 committed Jul 7, 2017
1 parent 5492908 commit d754393
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 124 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "checksum_reader"]
path = checksumming_io
url = https://github.com/HumanCellAtlas/checksumming_io.git
1 change: 1 addition & 0 deletions checksumming_io
Submodule checksumming_io added at e20549
Empty file added hca/packages/__init__.py
Empty file.
1 change: 1 addition & 0 deletions hca/packages/checksumming_io
69 changes: 69 additions & 0 deletions hca/upload_to_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3.6

"""
Run "pip install crcmod python-magic boto3" to install this script's dependencies.
"""
import argparse
import logging
import mimetypes
import os
import uuid

import boto3
from boto3.s3.transfer import TransferConfig
from io import open

from .packages.checksumming_io import ChecksummingBufferedReader, S3Etag


logging.basicConfig(level=logging.INFO)


def encode_tags(tags):
return [dict(Key=k, Value=v) for k, v in tags.items()]


def _mime_type(filename):
type_, encoding = mimetypes.guess_type(filename)
if encoding:
return encoding
if type_:
return type_
raise RuntimeError("Can't discern mime type")


def upload_to_cloud(files, staging_bucket, replica):
"""
Upload files to cloud.
:param files: A list of binary files to upload.
:param staging_bucket: The aws bucket to upload the files to.
:param replica: The cloud replica to write to. One of 'aws', 'gc', or 'azure'. No functionality now.
:return: a list of each file's unique key name.
"""
tx_cfg = TransferConfig(multipart_threshold=S3Etag.etag_stride,
multipart_chunksize=S3Etag.etag_stride)
s3 = boto3.resource("s3")
bucket = s3.Bucket(staging_bucket)
key_names = []
for raw_fh in files:
with ChecksummingBufferedReader(raw_fh) as fh:

key_name = "{}/{}".format(uuid.uuid4(), os.path.basename(fh.raw.name))
bucket.upload_fileobj(fh, key_name, Config=tx_cfg)
sums = fh.get_checksums()
metadata = {
"hca-dss-s3_etag": sums["s3_etag"],
"hca-dss-sha1": sums["sha1"],
"hca-dss-sha256": sums["sha256"],
"hca-dss-crc32c": sums["crc32c"],
"hca-dss-content-type": _mime_type(fh.raw.name)
}

s3.meta.client.put_object_tagging(Bucket=bucket.name,
Key=key_name,
Tagging=dict(TagSet=encode_tags(metadata))
)
key_names.append(key_name)

return key_names
126 changes: 2 additions & 124 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,133 +31,11 @@ def test_make_name(self):

def test_index_parameters(self):
"""Test index_parameters in parser.py."""
params = {
"description": "Returns a list of bundles matching given criteria.\n",
"responses": {
"200": {
"description": "OK",
"schema": {
"properties": {
"bundles": {
"items": {
"$ref": "#/definitions/Bundle"
},
"type": "array"
}
},
"required": [
"bundles"
],
"type": "object"
}
}
},
"summary": "Query bundles"
}
self.assertEqual(hca.parser.index_parameters(params), {})

params["parameters"] = [
{
"description": "Bundle unique ID.",
"in": "path",
"name": "uuid",
"required": True,
"type": "string"
}
]
self.assertEqual(
hca.parser.index_parameters(params),
{"uuid": {
"description": "Bundle unique ID.",
"in": "path",
"name": "uuid",
"required": True,
"type": "string"
}})

params['parameters'] = [{
"in": "body",
"name": "extras",
"required": True,
"schema": {
"properties": {
"bundle_uuid": {
"description": "A RFC4122-compliant ID.",
"type": "string"
},
"timestamp": {
"description": "Timestamp of file creation in RFC3339.",
"format": "date-time",
"type": "string"
}
},
"required": [
"bundle_uuid",
],
"type": "object"
}
}
]
self.assertEqual(
hca.parser.index_parameters(params),
{"extras-timestamp": {
"description": "Timestamp of file creation in RFC3339.",
"in": "body",
"name": "extras-timestamp",
"required": False,
"type": "string",
"format": "date-time"
},
"extras-bundle_uuid": {
"description": "A RFC4122-compliant ID.",
"in": "body",
"name": "extras-bundle_uuid",
"required": True,
"type": "string",
}}
)
pass

def test_parsing(self):
"""Test that the parser parses arguments correctly."""
api = hca.define_api.API("url", "user")

args = ["put-files", "134", "--extras-bundle_uuid", "asdf", "--extras-creator_uid", "sdf", "--extras-source_url", "sljdf.com"]
out = {'extras_source_url': 'sljdf.com', 'extras_bundle_uuid': 'asdf', 'uuid': ['134'], 'extras_creator_uid': 'sdf'}
self.assertEqual(api.parse_args(args), out)

args = ["put-files", "--extras-bundle_uuid", "asdf", "--extras-creator_uid", "sdf", "--extras-source_url", "sljdf.com", "134"]
self.assertEqual(api.parse_args(args), out)

args = ["put-files", "--extras-creator_uid", "sdf", "--extras-source_url", "sljdf.com", "134"]
self.assertRaises(SystemExit, api.parse_args, args)

args = ["put-files", "--extras-bundle_uuid", "asdf", "--extras-creator_uid", "sdf", "--extras-source_url", "sljdf.com"]
self.assertRaises(SystemExit, api.parse_args, args)

args = ["put-files", "--extras-bundle_uuid", "--extras-creator_uid", "sdf", "--extras-source_url", "sljdf.com", "134"]
self.assertRaises(SystemExit, api.parse_args, args)

args = ["get-bundles"]
out = {}
self.assertEqual(api.parse_args(args), out)

args = ["get-bundles", "uuid_arg"]
out = {"uuid": "uuid_arg"}
self.assertEqual(api.parse_args(args), out)

args = ["get-bundles", "uuid_arg", "version_arg", "--replica", "rep"]
out = {"uuid": "uuid_arg", "replica": "rep", "bundle_version": "version_arg"}
self.assertEqual(api.parse_args(args), out)

# Works for now but shouldn't in the future b/c --replica required when uuid and version specified.
args = ["get-bundles", "uuid_arg", "version_arg"]
out = {"uuid": "uuid_arg", "bundle_version": "version_arg"}
self.assertEqual(api.parse_args(args), out)

# Works for now. --replica isn't an option unless both uuid and version specified.
args = ["get-bundles", "uuid_arg", "--replica", "rep"]
out = {"uuid": "uuid_arg", "replica": "rep"}
self.assertEqual(api.parse_args(args), out)
pass


if __name__ == '__main__':
Expand Down

0 comments on commit d754393

Please sign in to comment.