Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DataLake][Rename]Rename with Sas #12057

Merged
merged 8 commits into from
Sep 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

try:
from urllib.parse import quote, unquote
except ImportError:
from urllib2 import quote, unquote # type: ignore

from ._shared.base_client import parse_connection_str
from ._data_lake_file_client import DataLakeFileClient
from ._models import DirectoryProperties
Expand Down Expand Up @@ -300,16 +306,28 @@ def rename_directory(self, new_name, # type: str
"""
new_name = new_name.strip('/')
new_file_system = new_name.split('/')[0]
path = new_name[len(new_file_system):]
new_path_and_token = new_name[len(new_file_system):].split('?')
new_path = new_path_and_token[0]
try:
new_dir_sas = new_path_and_token[1] or self._query_str.strip('?')
except IndexError:
if not self._raw_credential and new_file_system != self.file_system_name:
raise ValueError("please provide the sas token for the new file")
if not self._raw_credential and new_file_system == self.file_system_name:
new_dir_sas = self._query_str.strip('?')

new_directory_client = DataLakeDirectoryClient(
self.url, new_file_system, directory_name=path, credential=self._raw_credential,
"{}://{}".format(self.scheme, self.primary_hostname), new_file_system, directory_name=new_path,
credential=self._raw_credential or new_dir_sas,
_hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline,
require_encryption=self.require_encryption,
key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
new_directory_client._rename_path('/'+self.file_system_name+'/'+self.path_name, # pylint: disable=protected-access
**kwargs)
new_directory_client._rename_path( # pylint: disable=protected-access
'/{}/{}{}'.format(quote(unquote(self.file_system_name)),
quote(unquote(self.path_name)),
self._query_str),
**kwargs)
return new_directory_client

def create_sub_directory(self, sub_directory, # type: Union[DirectoryProperties, str]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
# license information.
# --------------------------------------------------------------------------
from io import BytesIO

try:
from urllib.parse import quote, unquote
except ImportError:
from urllib2 import quote, unquote # type: ignore

import six

from ._quick_query_helper import DataLakeFileQueryReader
Expand Down Expand Up @@ -631,21 +637,35 @@ def rename_file(self, new_name, # type: str
"""
new_name = new_name.strip('/')
new_file_system = new_name.split('/')[0]
path = new_name[len(new_file_system):]

new_directory_client = DataLakeFileClient(
self.url, new_file_system, file_path=path, credential=self._raw_credential,
new_path_and_token = new_name[len(new_file_system):].split('?')
new_path = new_path_and_token[0]
try:
new_file_sas = new_path_and_token[1] or self._query_str.strip('?')
except IndexError:
if not self._raw_credential and new_file_system != self.file_system_name:
raise ValueError("please provide the sas token for the new file")
if not self._raw_credential and new_file_system == self.file_system_name:
new_file_sas = self._query_str.strip('?')

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like there is unnecessary code duplication in _data_lake_file_client.py and _data_lake_directory_client.py maybe we should create a method in path_client that we can use to get new_file_sas, new_path_and_token and new_path? what do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me take a look again...

new_file_client = DataLakeFileClient(
"{}://{}".format(self.scheme, self.primary_hostname), new_file_system, file_path=new_path,
credential=self._raw_credential or new_file_sas,
_hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline,
_location_mode=self._location_mode, require_encryption=self.require_encryption,
key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
new_directory_client._rename_path('/'+self.file_system_name+'/'+self.path_name, # pylint: disable=protected-access
**kwargs)
return new_directory_client
key_resolver_function=self.key_resolver_function
)
new_file_client._rename_path( # pylint: disable=protected-access
'/{}/{}{}'.format(quote(unquote(self.file_system_name)),
quote(unquote(self.path_name)),
self._query_str),
**kwargs)
return new_file_client

def query_file(self, query_expression, **kwargs):
# type: (str, **Any) -> DataLakeFileQueryReader
"""Enables users to select/project on datalake file data by providing simple query expressions.
"""
Enables users to select/project on datalake file data by providing simple query expressions.
This operations returns a DataLakeFileQueryReader, users need to use readall() or readinto() to get query data.

:param str query_expression:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
# --------------------------------------------------------------------------

try:
from urllib.parse import urlparse, quote, unquote
from urllib.parse import urlparse, quote
except ImportError:
from urlparse import urlparse # type: ignore
from urllib2 import quote, unquote # type: ignore
from urllib2 import quote # type: ignore

import six

Expand Down Expand Up @@ -409,7 +409,7 @@ def _rename_path_options(self, rename_source, content_settings=None, metadata=No
path_http_headers = get_path_http_headers(content_settings)

options = {
'rename_source': quote(unquote(rename_source)),
'rename_source': rename_source,
'path_http_headers': path_http_headers,
'lease_access_conditions': access_conditions,
'source_lease_id': source_lease_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
# license information.
# --------------------------------------------------------------------------
# pylint: disable=invalid-overridden-method

try:
from urllib.parse import quote, unquote
except ImportError:
from urllib2 import quote, unquote # type: ignore
from ._data_lake_file_client_async import DataLakeFileClient
from .._data_lake_directory_client import DataLakeDirectoryClient as DataLakeDirectoryClientBase
from .._models import DirectoryProperties
Expand Down Expand Up @@ -270,16 +273,28 @@ async def rename_directory(self, new_name, # type: str
"""
new_name = new_name.strip('/')
new_file_system = new_name.split('/')[0]
path = new_name[len(new_file_system):]
new_path_and_token = new_name[len(new_file_system):].split('?')
new_path = new_path_and_token[0]
try:
new_dir_sas = new_path_and_token[1] or self._query_str.strip('?')
except IndexError:
if not self._raw_credential and new_file_system != self.file_system_name:
raise ValueError("please provide the sas token for the new directory")
if not self._raw_credential and new_file_system == self.file_system_name:
new_dir_sas = self._query_str.strip('?')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment above applies to async as well


new_directory_client = DataLakeDirectoryClient(
self.url, new_file_system, directory_name=path, credential=self._raw_credential,
"{}://{}".format(self.scheme, self.primary_hostname), new_file_system, directory_name=new_path,
credential=self._raw_credential or new_dir_sas,
_hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline,
_location_mode=self._location_mode, require_encryption=self.require_encryption,
key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
await new_directory_client._rename_path('/' + self.file_system_name + '/' + self.path_name, # pylint: disable=protected-access
**kwargs)
await new_directory_client._rename_path( # pylint: disable=protected-access
'/{}/{}{}'.format(quote(unquote(self.file_system_name)),
quote(unquote(self.path_name)),
self._query_str),
**kwargs)
return new_directory_client

async def create_sub_directory(self, sub_directory, # type: Union[DirectoryProperties, str]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
# --------------------------------------------------------------------------
# pylint: disable=invalid-overridden-method

try:
from urllib.parse import quote, unquote
except ImportError:
from urllib2 import quote, unquote # type: ignore

from ._download_async import StorageStreamDownloader
from ._path_client_async import PathClient
from .._data_lake_file_client import DataLakeFileClient as DataLakeFileClientBase
Expand Down Expand Up @@ -500,14 +505,26 @@ async def rename_file(self, new_name, # type: str
"""
new_name = new_name.strip('/')
new_file_system = new_name.split('/')[0]
path = new_name[len(new_file_system):]

new_directory_client = DataLakeFileClient(
self.url, new_file_system, file_path=path, credential=self._raw_credential,
new_path_and_token = new_name[len(new_file_system):].split('?')
new_path = new_path_and_token[0]
try:
new_file_sas = new_path_and_token[1] or self._query_str.strip('?')
except IndexError:
if not self._raw_credential and new_file_system != self.file_system_name:
raise ValueError("please provide the sas token for the new file")
if not self._raw_credential and new_file_system == self.file_system_name:
new_file_sas = self._query_str.strip('?')

new_file_client = DataLakeFileClient(
"{}://{}".format(self.scheme, self.primary_hostname), new_file_system, file_path=new_path,
credential=self._raw_credential or new_file_sas,
_hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline,
_location_mode=self._location_mode, require_encryption=self.require_encryption,
key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
await new_directory_client._rename_path('/' + self.file_system_name + '/' + self.path_name, # pylint: disable=protected-access
**kwargs)
return new_directory_client
await new_file_client._rename_path( # pylint: disable=protected-access
'/{}/{}{}'.format(quote(unquote(self.file_system_name)),
quote(unquote(self.path_name)),
self._query_str),
**kwargs)
return new_file_client
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
**kwargs)

kwargs.pop('_hosts', None)
self._blob_client = BlobClient(self._blob_account_url, file_system_name, blob_name=path_name,
self._blob_client = BlobClient(self._blob_account_url, file_system_name, blob_name=self.path_name,
credential=credential, _hosts=self._blob_client._hosts, **kwargs) # type: ignore # pylint: disable=protected-access
self._client = DataLakeStorageClient(self.url, file_system_name, path_name, pipeline=self._pipeline)
self._loop = kwargs.get('loop', None)
Expand Down