Merge 8b3777d into 33d37ae

4dn-dcic · Jul 18, 2020 · ea10818 · ea10818
2 parents 33d37ae + 8b3777d
commit ea10818
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 41 deletions.
diff --git a/dcicutils/s3_utils.py b/dcicutils/s3_utils.py
@@ -17,13 +17,13 @@
 logger = logging.getLogger(__name__)
 
 
-class s3Utils(object):
+class s3Utils(object):  # NOQA - This class name violates style rules, but a lot of things might break if we change it.
 
     def __init__(self, outfile_bucket=None, sys_bucket=None, raw_file_bucket=None,
                  blob_bucket=None, env=None):
-        '''
+        """
         if we pass in env set the outfile and sys bucket from the environment
-        '''
+        """
         # avoid circular ref
         from .beanstalk_utils import get_beanstalk_real_url
         self.url = ''
@@ -101,10 +101,10 @@ def does_key_exist(self, key, bucket=None, print_error=True):
 
     def get_file_size(self, key, bucket=None, add_bytes=0, add_gb=0,
                       size_in_gb=False):
-        '''
+        """
         default returns file size in bytes,
         unless size_in_gb = True
-        '''
+        """
         meta = self.does_key_exist(key, bucket)
         if not meta:
             raise Exception("key not found")
@@ -120,15 +120,16 @@ def delete_key(self, key, bucket=None):
             bucket = self.outfile_bucket
         self.s3.delete_object(Bucket=bucket, Key=key)
 
-    def size(self, bucket):
+    @classmethod
+    def size(cls, bucket):
         sbuck = boto3.resource('s3').Bucket(bucket)
         # get only head of objects so we can count them
         return sum(1 for _ in sbuck.objects.all())
 
     def s3_put(self, obj, upload_key, acl=None):
-        '''
+        """
         try to guess content type
-        '''
+        """
         content_type = mimetypes.guess_type(upload_key)[0]
         if content_type is None:
             content_type = 'binary/octet-stream'
@@ -165,9 +166,9 @@ def s3_delete_dir(self, prefix):
         files = obj_list.get('Contents', [])
 
         # morph file list into format that boto3 wants
-        delete_keys = {'Objects': []}
-        delete_keys['Objects'] = [{'Key': k} for k in
-                                  [obj['Key'] for obj in files]]
+        delete_keys = {'Objects': [{'Key': k}
+                                   for k in [obj['Key']
+                                             for obj in files]]}
 
         # second query deletes all the files, NOTE: Max 1000 files
         if delete_keys['Objects']:
@@ -189,7 +190,7 @@ def read_s3_zipfile(self, s3key, files_to_extract):
 
     def unzip_s3_to_s3(self, zipped_s3key, dest_dir, acl=None, do_not_store=False):
         """stream the content of a zipped key on S3 to another location on S3.
-        if do_not_store=false, it saves the content and return it in the dictionary format
+        if do_not_store=false, it saves the content and returns it in the dictionary format
         (default)
         """
 
@@ -222,9 +223,9 @@ def unzip_s3_to_s3(self, zipped_s3key, dest_dir, acl=None, do_not_store=False):
                     s3_file_name = dest_dir + file_name
                 s3_key = "https://s3.amazonaws.com/%s/%s" % (self.outfile_bucket, s3_file_name)
                 # just perf optimization so we don't have to copy
-                # files twice that we want to further interogate
+                # files twice that we want to further interrogate
                 the_file = zipstream.open(file_name, 'r').read()
-                file_to_find = file_name.split('/')[-1]
+                file_to_find = os.path.basename(file_name)
                 if not do_not_store:
                     ret_files[file_to_find] = {'s3key': s3_key,
                                                'data': the_file}

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "0.34.2"
+version = "0.34.0"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

diff --git a/test/conftest.py b/test/conftest.py
@@ -9,6 +9,8 @@
 INTEGRATED_ES = 'https://search-fourfront-mastertest-wusehbixktyxtbagz5wzefffp4.us-east-1.es.amazonaws.com'
 
 
+TEST_DIR = os.path.join(os.path.dirname(__file__))
+
 @pytest.fixture(scope='session')
 def basestring():
     try:
@@ -36,7 +38,6 @@ def integrated_ff():
                         'the homepage gave status of: %s' % (INTEGRATED_ENV, res.status_code))
     return integrated
 
-
 @pytest.fixture(scope='session')
 def integrated_s3_info():
     """
@@ -50,9 +51,9 @@ def integrated_s3_info():
     # for now, always upload these files
     s3Obj.s3.put_object(Bucket=s3Obj.outfile_bucket, Key=test_filename,
                           Body=str.encode('thisisatest'))
-    zip_path = os.path.join('test', 'data_files', zip_filename.split('/')[-1])
+    zip_path = os.path.join(TEST_DIR, 'data_files', os.path.basename(zip_filename))
     s3Obj.s3.upload_file(Filename=str(zip_path), Bucket=s3Obj.outfile_bucket, Key=zip_filename)
-    zip_path2 = os.path.join('test', 'data_files', zip_filename2.split('/')[-1])
+    zip_path2 = os.path.join(TEST_DIR, 'data_files', os.path.basename(zip_filename2))
     s3Obj.s3.upload_file(Filename=str(zip_path2), Bucket=s3Obj.outfile_bucket, Key=zip_filename2)
 
     return {'s3Obj': s3Obj, 'filename': test_filename, 'zip_filename': zip_filename,

diff --git a/test/test_s3_utils.py b/test/test_s3_utils.py
@@ -184,59 +184,67 @@ def test_read_s3_zip(integrated_s3_info):
 
 
 def test_unzip_s3_to_s3(integrated_s3_info):
-    '''test for unzip_s3_to_s3 with case where there is a basdir'''
+    """test for unzip_s3_to_s3 with case where there is a basdir"""
+
     prefix = '__test_data/extracted'
     filename = integrated_s3_info['zip_filename']
-    integrated_s3_info['s3Obj'].s3_delete_dir(prefix)
+    s3_connection = integrated_s3_info['s3Obj']
+
+    # start with a clean test space
+    s3_connection.s3_delete_dir(prefix)
 
     # ensure this thing was deleted
     # if no files there will be no Contents in response
-    objs = integrated_s3_info['s3Obj'].s3_read_dir(prefix)
-    assert [] == objs.get('Contents', [])
+    objs = s3_connection.s3_read_dir(prefix)
+    assert not objs.get('Contents')
 
     # now copy to that dir we just deleted
-    ret_files = integrated_s3_info['s3Obj'].unzip_s3_to_s3(filename, prefix)
+    ret_files = s3_connection.unzip_s3_to_s3(filename, prefix)
     assert ret_files['fastqc_report.html']['s3key'].startswith("https://s3.amazonaws.com")
 
-    objs = integrated_s3_info['s3Obj'].s3_read_dir(prefix)
-    assert objs.get('Contents', None)
+    objs = s3_connection.s3_read_dir(prefix)
+    assert objs.get('Contents')
 
 
 def test_unzip_s3_to_s3_2(integrated_s3_info):
-    '''test for unzip_s3_to_s3 with case where there is no basdir'''
+    """test for unzip_s3_to_s3 with case where there is no basdir"""
+
     prefix = '__test_data/extracted'
     filename = integrated_s3_info['zip_filename2']
-    integrated_s3_info['s3Obj'].s3_delete_dir(prefix)
+    s3_connection = integrated_s3_info['s3Obj']
+
+    s3_connection.s3_delete_dir(prefix)
 
     # ensure this thing was deleted
     # if no files there will be no Contents in response
-    objs = integrated_s3_info['s3Obj'].s3_read_dir(prefix)
-    assert [] == objs.get('Contents', [])
+    objs = s3_connection.s3_read_dir(prefix)
+    assert not objs.get('Contents')
 
     # now copy to that dir we just deleted
-    ret_files = integrated_s3_info['s3Obj'].unzip_s3_to_s3(filename, prefix)
+    ret_files = s3_connection.unzip_s3_to_s3(filename, prefix)
     assert ret_files['qc_report.html']['s3key'].startswith("https://s3.amazonaws.com")
     assert ret_files['qc_report.html']['s3key'].endswith("qc_report.html")
 
-    objs = integrated_s3_info['s3Obj'].s3_read_dir(prefix)
-    assert objs.get('Contents', None)
+    objs = s3_connection.s3_read_dir(prefix)
+    assert objs.get('Contents')
 
 
-def test_unzip_s3_to_s3(integrated_s3_info):
-    '''test for unzip_s3_to_s3 with case where there is a basdir and
-    do_not_store=True'''
+def test_unzip_s3_to_s3_do_not_store(integrated_s3_info):
+    """test for unzip_s3_to_s3 with case where there is a basdir and do_not_store=True"""
     prefix = '__test_data/extracted'
     filename = integrated_s3_info['zip_filename']
-    integrated_s3_info['s3Obj'].s3_delete_dir(prefix)
+    s3_connection = integrated_s3_info['s3Obj']
+
+    s3_connection.s3_delete_dir(prefix)
 
     # ensure this thing was deleted
     # if no files there will be no Contents in response
-    objs = integrated_s3_info['s3Obj'].s3_read_dir(prefix)
-    assert [] == objs.get('Contents', [])
+    objs = s3_connection.s3_read_dir(prefix)
+    assert not objs.get('Contents')
 
     # now copy to that dir we just deleted
-    ret_files = integrated_s3_info['s3Obj'].unzip_s3_to_s3(filename, prefix, do_not_store=True)
+    ret_files = s3_connection.unzip_s3_to_s3(filename, prefix, do_not_store=True)
     assert len(ret_files) == 0  # no returned content
 
-    objs = integrated_s3_info['s3Obj'].s3_read_dir(prefix)
-    assert objs.get('Contents', None)
+    objs = s3_connection.s3_read_dir(prefix)
+    assert objs.get('Contents')