From 9339b26031aab45226a40e9d846be97c9ce4a3d9 Mon Sep 17 00:00:00 2001 From: Akshat Harit Date: Wed, 14 Oct 2020 23:20:00 -0700 Subject: [PATCH] Add more logging for zero byte reads --- HISTORY.rst | 4 ++++ azure/datalake/store/__init__.py | 2 +- azure/datalake/store/core.py | 14 ++++++++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 769b561..b2a6be6 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,10 @@ Release History =============== +0.0.51 (2020-10-15) ++++++++++++++++++++ +* Add more logging for zero byte reads to investigate root cause. + 0.0.50 (2020-09-10) +++++++++++++++++++ * Fix bug with retrying for ADAL exception parsing. diff --git a/azure/datalake/store/__init__.py b/azure/datalake/store/__init__.py index 3a0adbc..33961e1 100644 --- a/azure/datalake/store/__init__.py +++ b/azure/datalake/store/__init__.py @@ -6,7 +6,7 @@ # license information. # -------------------------------------------------------------------------- -__version__ = "0.0.50" +__version__ = "0.0.51" from .core import AzureDLFileSystem from .multithread import ADLDownloader diff --git a/azure/datalake/store/core.py b/azure/datalake/store/core.py index 6a262e1..9427ae1 100644 --- a/azure/datalake/store/core.py +++ b/azure/datalake/store/core.py @@ -1135,7 +1135,9 @@ def _read_blocksize(self, offset=-1): self.end = self.size self.cache = b"" return - if offset >= self.start and offset < self.end: + if self.start <= offset < self.end: + logger.info("Read offset {offset} is within cache {start}-{end}. " + "Not going to server.".format(offset=offset, start=self.start, end=self.end)) return if offset > self.size: raise ValueError('Read offset is outside the File') @@ -1165,9 +1167,13 @@ def read(self, length=-1): if not data_read: # Check to catch possible server errors. Ideally shouldn't happen. flag += 1 if flag >= 5: - raise DatalakeIncompleteTransferException('Could not read data: {}. ' - 'Repeated zero byte reads. ' - 'Possible file corruption'.format(self.path)) + exception_string = "Current Location:{loc}, " \ + "File Size:{size}, Cache Start:{start}, " \ + "Cache End:{end}".format(loc=self.loc, size=self.size, + start=self.start, end=self.end) + raise DatalakeIncompleteTransferException('Could not read data: {path}. ' + 'Repeated zero byte reads. Possible file corruption. File Details' + '{details}'.format(path=self.path, details=exception_string)) out += data_read self.loc += len(data_read) length -= len(data_read)