Skip to content

Commit

Permalink
bugfix: 修复 file_overwrite 在 BKRepoStorage 中不生效的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
shabbywu committed Sep 6, 2021
1 parent 4fe92ab commit c1b3d4f
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 67 deletions.
14 changes: 10 additions & 4 deletions sdks/bk-storages/bkstorages/backends/bkrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@

import curlify
import requests
from bkstorages.utils import get_setting, setting
from django.core.exceptions import ImproperlyConfigured
from django.core.files.base import File
from django.core.files.storage import Storage
from django.utils.deconstruct import deconstructible
from requests.auth import HTTPBasicAuth
from six.moves.urllib_parse import urljoin

from bkstorages.utils import clean_name, get_available_overwrite_name, get_setting, safe_join, setting

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -319,9 +320,11 @@ def __init__(
)

def _full_path(self, name):
if name == '/':
name = ''
return urljoin(self.root_path, name).replace('\\', '/')
cleaned_name = clean_name(name)
# safe_join can not work with absolute path
if cleaned_name.startswith("/"):
cleaned_name = cleaned_name[1:]
return safe_join(self.root_path, cleaned_name)

def _open(self, name, mode='rb'):
return BKRepoFile(self._full_path(name), self)
Expand Down Expand Up @@ -364,6 +367,9 @@ def get_available_name(self, name, max_length=None):
Returns a filename that's free on the target storage system, and
available for new content to be written to.
"""
name = clean_name(name)
if self.file_overwrite:
return get_available_overwrite_name(name, max_length)
return super().get_available_name(name, max_length)

def generate_filename(self, filename):
Expand Down
70 changes: 12 additions & 58 deletions sdks/bk-storages/bkstorages/backends/rgw.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@
"""
import mimetypes
import os
import posixpath
from gzip import GzipFile
from tempfile import SpooledTemporaryFile

import botocore.utils
from bkstorages.utils import get_setting, setting
from boto3 import __version__ as boto3_version
from boto3 import resource
from botocore.client import Config
Expand All @@ -30,6 +28,8 @@
from six import BytesIO, string_types
from six.moves.urllib import parse as urlparse

from bkstorages.utils import clean_name, get_available_overwrite_name, get_setting, safe_join, setting

SAFE_CHARS = '-._~'


Expand Down Expand Up @@ -66,37 +66,6 @@ def percent_encode(input_str, safe=SAFE_CHARS, encoding=None):
)


def safe_join(base, *paths):
"""
A version of django.utils._os.safe_join for S3 paths.
Joins one or more path components to the base path component
intelligently. Returns a normalized version of the final path.
The final path must be located inside of the base path component
(otherwise a ValueError is raised).
Paths outside the base path indicate a possible security
sensitive operation.
"""
base_path = force_text(base)
base_path = base_path.rstrip('/')
paths = [force_text(p) for p in paths]

final_path = base_path
for path in paths:
final_path = urlparse.urljoin(final_path.rstrip('/') + "/", path)

# Ensure final_path starts with base_path and that the next character after
# the final path is '/' (or nothing, in which case final_path must be
# equal to base_path).
base_path_len = len(base_path)
if not final_path.startswith(base_path) or final_path[base_path_len : base_path_len + 1] not in ('', '/'):
raise ValueError('the joined path is located outside of the base path' ' component')

return final_path.lstrip('/')


@deconstructible
class RGWBoto3StorageFile(File):

Expand Down Expand Up @@ -424,21 +393,6 @@ def _get_or_create_bucket(self, name):
)
return bucket

def _clean_name(self, name):
"""
Cleans the name so that Windows style paths work
"""
# Normalize Windows style paths
clean_name = posixpath.normpath(name).replace('\\', '/')

# os.path.normpath() can strip trailing slashes so we implement
# a workaround here.
if name.endswith('/') and not clean_name.endswith('/'):
# Add a trailing slash as it was stripped.
return clean_name + '/'
else:
return clean_name

def _normalize_name(self, name):
"""
Normalizes the name so that paths like /path/to/ignored/../something.txt
Expand Down Expand Up @@ -471,7 +425,7 @@ def _compress_content(self, content):
return zbuf

def _open(self, name, mode='rb'):
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
try:
f = self.file_class(name, mode, self)
except self.connection_response_error as err:
Expand All @@ -481,7 +435,7 @@ def _open(self, name, mode='rb'):
return f

def _save(self, name, content):
cleaned_name = self._clean_name(name)
cleaned_name = clean_name(name)
name = self._normalize_name(cleaned_name)
parameters = self.object_parameters.copy()
content_type = getattr(content, 'content_type', mimetypes.guess_type(name)[0] or self.default_content_type)
Expand Down Expand Up @@ -516,7 +470,7 @@ def _save_content(self, obj, content, parameters):
obj.upload_fileobj(content, ExtraArgs=put_parameters)

def delete(self, name):
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
self.bucket.Object(self._encode_name(name)).delete()

def exists(self, name):
Expand All @@ -526,7 +480,7 @@ def exists(self, name):
return True
except ImproperlyConfigured:
return False
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
if self.entries:
return name in self.entries
obj = self.bucket.Object(self._encode_name(name))
Expand All @@ -537,7 +491,7 @@ def exists(self, name):
return False

def listdir(self, name):
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
# for the bucket.objects.filter and logic below name needs to end in /
# But for the root path "" we leave it as an empty string
if name and not name.endswith('/'):
Expand All @@ -558,7 +512,7 @@ def listdir(self, name):
return list(dirs), files

def size(self, name):
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
if self.entries:
entry = self.entries.get(name)
if entry:
Expand All @@ -571,7 +525,7 @@ def get_modified_time(self, name):
Returns an (aware) datetime object containing the last modified time if
USE_TZ is True, otherwise returns a naive datetime in the local timezone.
"""
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
entry = self.entries.get(name)
# only call self.bucket.Object() if the key is not found
# in the preloaded metadata.
Expand Down Expand Up @@ -623,7 +577,7 @@ def _strip_signing_parameters(self, url):
def url(self, name, parameters=None, expire=None):
# Preserve the trailing slash after normalizing the path.
# TODO: Handle force_http=not self.secure_urls like in s3boto
name = self._normalize_name(self._clean_name(name))
name = self._normalize_name(clean_name(name))
if self.custom_domain:
return "%s//%s/%s" % (self.url_protocol, self.custom_domain, filepath_to_uri(name))
if expire is None:
Expand All @@ -639,9 +593,9 @@ def url(self, name, parameters=None, expire=None):

def get_available_name(self, name, max_length=None):
"""Overwrite existing file with the same name."""
name = clean_name(name)
if self.file_overwrite:
name = self._clean_name(name)
return name
return get_available_overwrite_name(name, max_length)
return super(RGWBoto3Storage, self).get_available_name(name, max_length)


Expand Down
78 changes: 77 additions & 1 deletion sdks/bk-storages/bkstorages/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
* specific language governing permissions and limitations under the License.
"""
import os
import posixpath

from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.exceptions import ImproperlyConfigured, SuspiciousFileOperation
from django.utils.encoding import force_text
from six.moves.urllib.parse import urljoin


def setting(name, default=None, strict=False):
Expand Down Expand Up @@ -52,3 +55,76 @@ def get_setting(names, allow_env=True):
if value is not None:
break
return value


def safe_join(base, *paths):
"""
A version of django.utils._os.safe_join for S3 paths.
Joins one or more path components to the base path component
intelligently. Returns a normalized version of the final path.
The final path must be located inside of the base path component
(otherwise a ValueError is raised).
Paths outside the base path indicate a possible security
sensitive operation.
"""
base_path = force_text(base)
base_path = base_path.rstrip('/')
paths = [force_text(p) for p in paths]

final_path = base_path
for path in paths:
final_path = urljoin(final_path.rstrip('/') + "/", path)

# Ensure final_path starts with base_path and that the next character after
# the final path is '/' (or nothing, in which case final_path must be
# equal to base_path).
base_path_len = len(base_path)
if not final_path.startswith(base_path) or final_path[base_path_len : base_path_len + 1] not in ('', '/'):
raise ValueError('the joined path is located outside of the base path' ' component')

return final_path.lstrip('/')


def clean_name(name):
"""
Cleans the name so that Windows style paths work
"""
# Normalize Windows style paths
clean_name = posixpath.normpath(name).replace('\\', '/')

# posixpath.normpath() can strip trailing slashes so we implement
# a workaround here.
if name.endswith('/') and not clean_name.endswith('/'):
# Add a trailing slash as it was stripped.
clean_name = clean_name + '/'

# Given an empty string, posixpath.normpath() will return ., which we don't want
if clean_name == '.' and name != ".":
clean_name = ''

return clean_name


def get_available_overwrite_name(name, max_length):
"""Return a filename that's meet the maximum length limit.
If the name is too long, this function will try to truncate those overflow.
"""
if max_length is None or len(name) <= max_length:
return name

# Adapted from Django
dir_name, file_name = os.path.split(name)
file_root, file_ext = os.path.splitext(file_name)
truncation = len(name) - max_length

file_root = file_root[:-truncation]
if not file_root:
raise SuspiciousFileOperation(
'Storage can not truncate away entire filename for "%s". '
'Please make sure that the corresponding file field '
'allows sufficient "max_length".' % name
)
return os.path.join(dir_name, "{}{}".format(file_root, file_ext))
2 changes: 1 addition & 1 deletion sdks/bk-storages/tests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
"""
import os

from bkstorages.backends.rgw import RGWBoto3Storage
from django.db import models

from bkstorages.backends.rgw import RGWBoto3Storage

storage = RGWBoto3Storage()

Expand Down
2 changes: 1 addition & 1 deletion sdks/bk-storages/tests/test_bkrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import pytest
import requests
import requests_mock
from bkstorages.backends.bkrepo import BKGenericRepoClient, BKRepoFile, BKRepoStorage
from six.moves.urllib_parse import urljoin

from bkstorages.backends.bkrepo import BKGenericRepoClient, BKRepoFile, BKRepoStorage
from tests.utils import generate_random_string


Expand Down
5 changes: 3 additions & 2 deletions sdks/bk-storages/tests/test_rgw.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@

import pytest
import six
from bkstorages.backends.rgw import RGWBoto3Storage
from django.conf import settings
from django.core.files import File
from django.core.files.base import ContentFile
from django.conf import settings

from bkstorages.backends.rgw import RGWBoto3Storage

from .models import RGWFile

Expand Down

0 comments on commit c1b3d4f

Please sign in to comment.