bugfix: 修复 file_overwrite 在 BKRepoStorage 中不生效的问题

TencentBlueKing · Sep 6, 2021 · c1b3d4f · c1b3d4f
1 parent 4fe92ab
commit c1b3d4f
Show file tree

Hide file tree

Showing 6 changed files with 104 additions and 67 deletions.
diff --git a/sdks/bk-storages/bkstorages/backends/bkrepo.py b/sdks/bk-storages/bkstorages/backends/bkrepo.py
@@ -15,14 +15,15 @@
 
 import curlify
 import requests
-from bkstorages.utils import get_setting, setting
 from django.core.exceptions import ImproperlyConfigured
 from django.core.files.base import File
 from django.core.files.storage import Storage
 from django.utils.deconstruct import deconstructible
 from requests.auth import HTTPBasicAuth
 from six.moves.urllib_parse import urljoin
 
+from bkstorages.utils import clean_name, get_available_overwrite_name, get_setting, safe_join, setting
+
 logger = logging.getLogger(__name__)
 
 
@@ -319,9 +320,11 @@ def __init__(
         )
 
     def _full_path(self, name):
-        if name == '/':
-            name = ''
-        return urljoin(self.root_path, name).replace('\\', '/')
+        cleaned_name = clean_name(name)
+        # safe_join can not work with absolute path
+        if cleaned_name.startswith("/"):
+            cleaned_name = cleaned_name[1:]
+        return safe_join(self.root_path, cleaned_name)
 
     def _open(self, name, mode='rb'):
         return BKRepoFile(self._full_path(name), self)
@@ -364,6 +367,9 @@ def get_available_name(self, name, max_length=None):
         Returns a filename that's free on the target storage system, and
         available for new content to be written to.
         """
+        name = clean_name(name)
+        if self.file_overwrite:
+            return get_available_overwrite_name(name, max_length)
         return super().get_available_name(name, max_length)
 
     def generate_filename(self, filename):

diff --git a/sdks/bk-storages/bkstorages/backends/rgw.py b/sdks/bk-storages/bkstorages/backends/rgw.py
@@ -10,12 +10,10 @@
 """
 import mimetypes
 import os
-import posixpath
 from gzip import GzipFile
 from tempfile import SpooledTemporaryFile
 
 import botocore.utils
-from bkstorages.utils import get_setting, setting
 from boto3 import __version__ as boto3_version
 from boto3 import resource
 from botocore.client import Config
@@ -30,6 +28,8 @@
 from six import BytesIO, string_types
 from six.moves.urllib import parse as urlparse
 
+from bkstorages.utils import clean_name, get_available_overwrite_name, get_setting, safe_join, setting
+
 SAFE_CHARS = '-._~'
 
 
@@ -66,37 +66,6 @@ def percent_encode(input_str, safe=SAFE_CHARS, encoding=None):
     )
 
 
-def safe_join(base, *paths):
-    """
-    A version of django.utils._os.safe_join for S3 paths.
-
-    Joins one or more path components to the base path component
-    intelligently. Returns a normalized version of the final path.
-
-    The final path must be located inside of the base path component
-    (otherwise a ValueError is raised).
-
-    Paths outside the base path indicate a possible security
-    sensitive operation.
-    """
-    base_path = force_text(base)
-    base_path = base_path.rstrip('/')
-    paths = [force_text(p) for p in paths]
-
-    final_path = base_path
-    for path in paths:
-        final_path = urlparse.urljoin(final_path.rstrip('/') + "/", path)
-
-    # Ensure final_path starts with base_path and that the next character after
-    # the final path is '/' (or nothing, in which case final_path must be
-    # equal to base_path).
-    base_path_len = len(base_path)
-    if not final_path.startswith(base_path) or final_path[base_path_len : base_path_len + 1] not in ('', '/'):
-        raise ValueError('the joined path is located outside of the base path' ' component')
-
-    return final_path.lstrip('/')
-
-
 @deconstructible
 class RGWBoto3StorageFile(File):
 
@@ -424,21 +393,6 @@ def _get_or_create_bucket(self, name):
                     )
         return bucket
 
-    def _clean_name(self, name):
-        """
-        Cleans the name so that Windows style paths work
-        """
-        # Normalize Windows style paths
-        clean_name = posixpath.normpath(name).replace('\\', '/')
-
-        # os.path.normpath() can strip trailing slashes so we implement
-        # a workaround here.
-        if name.endswith('/') and not clean_name.endswith('/'):
-            # Add a trailing slash as it was stripped.
-            return clean_name + '/'
-        else:
-            return clean_name
-
     def _normalize_name(self, name):
         """
         Normalizes the name so that paths like /path/to/ignored/../something.txt
@@ -471,7 +425,7 @@ def _compress_content(self, content):
         return zbuf
 
     def _open(self, name, mode='rb'):
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         try:
             f = self.file_class(name, mode, self)
         except self.connection_response_error as err:
@@ -481,7 +435,7 @@ def _open(self, name, mode='rb'):
         return f
 
     def _save(self, name, content):
-        cleaned_name = self._clean_name(name)
+        cleaned_name = clean_name(name)
         name = self._normalize_name(cleaned_name)
         parameters = self.object_parameters.copy()
         content_type = getattr(content, 'content_type', mimetypes.guess_type(name)[0] or self.default_content_type)
@@ -516,7 +470,7 @@ def _save_content(self, obj, content, parameters):
         obj.upload_fileobj(content, ExtraArgs=put_parameters)
 
     def delete(self, name):
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         self.bucket.Object(self._encode_name(name)).delete()
 
     def exists(self, name):
@@ -526,7 +480,7 @@ def exists(self, name):
                 return True
             except ImproperlyConfigured:
                 return False
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         if self.entries:
             return name in self.entries
         obj = self.bucket.Object(self._encode_name(name))
@@ -537,7 +491,7 @@ def exists(self, name):
             return False
 
     def listdir(self, name):
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         # for the bucket.objects.filter and logic below name needs to end in /
         # But for the root path "" we leave it as an empty string
         if name and not name.endswith('/'):
@@ -558,7 +512,7 @@ def listdir(self, name):
         return list(dirs), files
 
     def size(self, name):
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         if self.entries:
             entry = self.entries.get(name)
             if entry:
@@ -571,7 +525,7 @@ def get_modified_time(self, name):
         Returns an (aware) datetime object containing the last modified time if
         USE_TZ is True, otherwise returns a naive datetime in the local timezone.
         """
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         entry = self.entries.get(name)
         # only call self.bucket.Object() if the key is not found
         # in the preloaded metadata.
@@ -623,7 +577,7 @@ def _strip_signing_parameters(self, url):
     def url(self, name, parameters=None, expire=None):
         # Preserve the trailing slash after normalizing the path.
         # TODO: Handle force_http=not self.secure_urls like in s3boto
-        name = self._normalize_name(self._clean_name(name))
+        name = self._normalize_name(clean_name(name))
         if self.custom_domain:
             return "%s//%s/%s" % (self.url_protocol, self.custom_domain, filepath_to_uri(name))
         if expire is None:
@@ -639,9 +593,9 @@ def url(self, name, parameters=None, expire=None):
 
     def get_available_name(self, name, max_length=None):
         """Overwrite existing file with the same name."""
+        name = clean_name(name)
         if self.file_overwrite:
-            name = self._clean_name(name)
-            return name
+            return get_available_overwrite_name(name, max_length)
         return super(RGWBoto3Storage, self).get_available_name(name, max_length)
 
 

diff --git a/sdks/bk-storages/bkstorages/utils.py b/sdks/bk-storages/bkstorages/utils.py
@@ -9,9 +9,12 @@
  * specific language governing permissions and limitations under the License.
 """
 import os
+import posixpath
 
 from django.conf import settings
-from django.core.exceptions import ImproperlyConfigured
+from django.core.exceptions import ImproperlyConfigured, SuspiciousFileOperation
+from django.utils.encoding import force_text
+from six.moves.urllib.parse import urljoin
 
 
 def setting(name, default=None, strict=False):
@@ -52,3 +55,76 @@ def get_setting(names, allow_env=True):
             if value is not None:
                 break
     return value
+
+
+def safe_join(base, *paths):
+    """
+    A version of django.utils._os.safe_join for S3 paths.
+
+    Joins one or more path components to the base path component
+    intelligently. Returns a normalized version of the final path.
+
+    The final path must be located inside of the base path component
+    (otherwise a ValueError is raised).
+
+    Paths outside the base path indicate a possible security
+    sensitive operation.
+    """
+    base_path = force_text(base)
+    base_path = base_path.rstrip('/')
+    paths = [force_text(p) for p in paths]
+
+    final_path = base_path
+    for path in paths:
+        final_path = urljoin(final_path.rstrip('/') + "/", path)
+
+    # Ensure final_path starts with base_path and that the next character after
+    # the final path is '/' (or nothing, in which case final_path must be
+    # equal to base_path).
+    base_path_len = len(base_path)
+    if not final_path.startswith(base_path) or final_path[base_path_len : base_path_len + 1] not in ('', '/'):
+        raise ValueError('the joined path is located outside of the base path' ' component')
+
+    return final_path.lstrip('/')
+
+
+def clean_name(name):
+    """
+    Cleans the name so that Windows style paths work
+    """
+    # Normalize Windows style paths
+    clean_name = posixpath.normpath(name).replace('\\', '/')
+
+    # posixpath.normpath() can strip trailing slashes so we implement
+    # a workaround here.
+    if name.endswith('/') and not clean_name.endswith('/'):
+        # Add a trailing slash as it was stripped.
+        clean_name = clean_name + '/'
+
+    # Given an empty string, posixpath.normpath() will return ., which we don't want
+    if clean_name == '.' and name != ".":
+        clean_name = ''
+
+    return clean_name
+
+
+def get_available_overwrite_name(name, max_length):
+    """Return a filename that's meet the maximum length limit.
+    If the name is too long, this function will try to truncate those overflow.
+    """
+    if max_length is None or len(name) <= max_length:
+        return name
+
+    # Adapted from Django
+    dir_name, file_name = os.path.split(name)
+    file_root, file_ext = os.path.splitext(file_name)
+    truncation = len(name) - max_length
+
+    file_root = file_root[:-truncation]
+    if not file_root:
+        raise SuspiciousFileOperation(
+            'Storage can not truncate away entire filename for "%s". '
+            'Please make sure that the corresponding file field '
+            'allows sufficient "max_length".' % name
+        )
+    return os.path.join(dir_name, "{}{}".format(file_root, file_ext))
diff --git a/sdks/bk-storages/tests/models.py b/sdks/bk-storages/tests/models.py
@@ -10,9 +10,9 @@
 """
 import os
 
-from bkstorages.backends.rgw import RGWBoto3Storage
 from django.db import models
 
+from bkstorages.backends.rgw import RGWBoto3Storage
 
 storage = RGWBoto3Storage()
 

diff --git a/sdks/bk-storages/tests/test_bkrepo.py b/sdks/bk-storages/tests/test_bkrepo.py
@@ -14,9 +14,9 @@
 import pytest
 import requests
 import requests_mock
-from bkstorages.backends.bkrepo import BKGenericRepoClient, BKRepoFile, BKRepoStorage
 from six.moves.urllib_parse import urljoin
 
+from bkstorages.backends.bkrepo import BKGenericRepoClient, BKRepoFile, BKRepoStorage
 from tests.utils import generate_random_string
 
 

diff --git a/sdks/bk-storages/tests/test_rgw.py b/sdks/bk-storages/tests/test_rgw.py
@@ -13,10 +13,11 @@
 
 import pytest
 import six
-from bkstorages.backends.rgw import RGWBoto3Storage
+from django.conf import settings
 from django.core.files import File
 from django.core.files.base import ContentFile
-from django.conf import settings
+
+from bkstorages.backends.rgw import RGWBoto3Storage
 
 from .models import RGWFile