-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merging dependencies into master - merged into high-level-commands in…
… last pull request. (#12) * Installing the dependencies and file structure needed for high-level commands to actually put files into the cloud. * Removed tests because i/o args changed quite a bit. Adding tests at the end. * Made _mime_type function readable * Added replica argument to upload_to_cloud script * Changed default staging bucket to an environment variable. * Removed scripting capability of upload_to_cloud.py
- Loading branch information
Showing
6 changed files
with
76 additions
and
124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "checksum_reader"] | ||
path = checksumming_io | ||
url = https://github.com/HumanCellAtlas/checksumming_io.git |
Submodule checksumming_io
added at
e20549
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../checksumming_io/checksumming_io |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/usr/bin/env python3.6 | ||
|
||
""" | ||
Run "pip install crcmod python-magic boto3" to install this script's dependencies. | ||
""" | ||
import argparse | ||
import logging | ||
import mimetypes | ||
import os | ||
import uuid | ||
|
||
import boto3 | ||
from boto3.s3.transfer import TransferConfig | ||
from io import open | ||
|
||
from .packages.checksumming_io import ChecksummingBufferedReader, S3Etag | ||
|
||
|
||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def encode_tags(tags): | ||
return [dict(Key=k, Value=v) for k, v in tags.items()] | ||
|
||
|
||
def _mime_type(filename): | ||
type_, encoding = mimetypes.guess_type(filename) | ||
if encoding: | ||
return encoding | ||
if type_: | ||
return type_ | ||
raise RuntimeError("Can't discern mime type") | ||
|
||
|
||
def upload_to_cloud(files, staging_bucket, replica): | ||
""" | ||
Upload files to cloud. | ||
:param files: A list of binary files to upload. | ||
:param staging_bucket: The aws bucket to upload the files to. | ||
:param replica: The cloud replica to write to. One of 'aws', 'gc', or 'azure'. No functionality now. | ||
:return: a list of each file's unique key name. | ||
""" | ||
tx_cfg = TransferConfig(multipart_threshold=S3Etag.etag_stride, | ||
multipart_chunksize=S3Etag.etag_stride) | ||
s3 = boto3.resource("s3") | ||
bucket = s3.Bucket(staging_bucket) | ||
key_names = [] | ||
for raw_fh in files: | ||
with ChecksummingBufferedReader(raw_fh) as fh: | ||
|
||
key_name = "{}/{}".format(uuid.uuid4(), os.path.basename(fh.raw.name)) | ||
bucket.upload_fileobj(fh, key_name, Config=tx_cfg) | ||
sums = fh.get_checksums() | ||
metadata = { | ||
"hca-dss-s3_etag": sums["s3_etag"], | ||
"hca-dss-sha1": sums["sha1"], | ||
"hca-dss-sha256": sums["sha256"], | ||
"hca-dss-crc32c": sums["crc32c"], | ||
"hca-dss-content-type": _mime_type(fh.raw.name) | ||
} | ||
|
||
s3.meta.client.put_object_tagging(Bucket=bucket.name, | ||
Key=key_name, | ||
Tagging=dict(TagSet=encode_tags(metadata)) | ||
) | ||
key_names.append(key_name) | ||
|
||
return key_names |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters