Skip to content

Commit c93f4e2

Browse files
committed
Fixed glitch in chunked-download code.
Could (and probably will) cause non-terminating loops. General refactors.
1 parent bd01c8a commit c93f4e2

File tree

7 files changed

+83
-82
lines changed

7 files changed

+83
-82
lines changed

gdrivefs/cache/volume.py

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def path_resolver(path):
2424

2525
parent_clause = path_relations.get_clause_from_path(path)
2626
if not parent_clause:
27-
logging.debug("Path [%s] does not exist for split.", path)
27+
# logging.debug("Path [%s] does not exist for split.", path)
2828
raise GdNotFoundError()
2929

3030
return (parent_clause[CLAUSE_ENTRY], parent_clause)
@@ -93,12 +93,7 @@ def remove_entry_recursive(self, entry_id, is_update=False):
9393
else:
9494
stat_files += 1
9595

96-
try:
97-
result = self.__remove_entry(current_entry_id, is_update)
98-
except:
99-
self.__log.debug("Could not remove entry with ID [%s] "
100-
"(recursive).", current_entry_id)
101-
raise
96+
result = self.__remove_entry(current_entry_id, is_update)
10297

10398
removed[current_entry_id] = True
10499

@@ -356,8 +351,8 @@ def register_entry(self, normalized_entry):
356351

357352
entry_id = normalized_entry.id
358353

359-
self.__log.debug("Registering entry with ID [%s] within path-"
360-
"relations.", entry_id)
354+
# self.__log.debug("Registering entry with ID [%s] within path-"
355+
# "relations.", entry_id)
361356

362357
if self.is_cached(entry_id, include_placeholders=False):
363358
# self.__log.debug("Entry to register with ID [%s] already "
@@ -464,28 +459,28 @@ def register_entry(self, normalized_entry):
464459
return entry_clause
465460

466461
def __load_all_children(self, parent_id):
467-
self.__log.debug("Loading children under parent with ID [%s].",
468-
parent_id)
462+
# self.__log.debug("Loading children under parent with ID [%s].",
463+
# parent_id)
469464

470465
with PathRelations.rlock:
471466
children = drive_proxy('list_files', parent_id=parent_id)
472467

473468
child_ids = [ ]
474469
if children:
475-
self.__log.debug("(%d) children returned and will be "
476-
"registered.", len(children))
470+
# self.__log.debug("(%d) children returned and will be "
471+
# "registered.", len(children))
477472

478473
for child in children:
479474
self.register_entry(child)
480475

481-
self.__log.debug("Looking up parent with ID [%s] for all-"
482-
"children update.", parent_id)
476+
# self.__log.debug("Looking up parent with ID [%s] for all-"
477+
# "children update.", parent_id)
483478

484479
parent_clause = self.__get_entry_clause_by_id(parent_id)
485480

486481
parent_clause[4] = True
487482

488-
self.__log.debug("All children have been loaded.")
483+
# self.__log.debug("All children have been loaded.")
489484

490485
return children
491486

@@ -494,7 +489,7 @@ def get_children_from_entry_id(self, entry_id):
494489
entry-ID.
495490
"""
496491

497-
self.__log.debug("Getting children under entry with ID [%s].",entry_id)
492+
# self.__log.debug("Getting children under entry with ID [%s].",entry_id)
498493

499494
with PathRelations.rlock:
500495
entry_clause = self.__get_entry_clause_by_id(entry_id)
@@ -523,8 +518,8 @@ def get_children_from_entry_id(self, entry_id):
523518
self.__log.error(message)
524519
raise Exception(message)
525520

526-
self.__log.debug("(%d) children found.",
527-
len(entry_clause[CLAUSE_CHILDREN]))
521+
# self.__log.debug("(%d) children found.",
522+
# len(entry_clause[CLAUSE_CHILDREN]))
528523

529524
return entry_clause[CLAUSE_CHILDREN]
530525

@@ -540,7 +535,7 @@ def get_children_entries_from_entry_id(self, entry_id):
540535

541536
def get_clause_from_path(self, filepath):
542537

543-
self.__log.debug("Getting clause for path [%s].", filepath)
538+
# self.__log.debug("Getting clause for path [%s].", filepath)
544539

545540
with PathRelations.rlock:
546541
path_results = self.find_path_components_goandget(filepath)
@@ -550,7 +545,7 @@ def get_clause_from_path(self, filepath):
550545
return None
551546

552547
entry_id = path_results[0][-1]
553-
self.__log.debug("Found entry with ID [%s].", entry_id)
548+
# self.__log.debug("Found entry with ID [%s].", entry_id)
554549

555550
# Make sure the entry is more than a placeholder.
556551
self.__get_entry_clause_by_id(entry_id)
@@ -567,8 +562,8 @@ def find_path_components_goandget(self, path):
567562
previous_results = []
568563
i = 0
569564
while 1:
570-
self.__log.debug("Attempting to find path-components (go and "
571-
"get) for path [%s]. CYCLE= (%d)", path, i)
565+
# self.__log.debug("Attempting to find path-components (go and "
566+
# "get) for path [%s]. CYCLE= (%d)", path, i)
572567

573568
# See how many components can be found in our current cache.
574569

@@ -626,8 +621,9 @@ def find_path_components_goandget(self, path):
626621

627622
filenames_phrase = ', '.join([ candidate.id for candidate
628623
in children ])
629-
self.__log.debug("(%d) candidate children were found: %s",
630-
len(children), filenames_phrase)
624+
625+
# self.__log.debug("(%d) candidate children were found: %s",
626+
# len(children), filenames_phrase)
631627

632628
i += 1
633629

@@ -651,7 +647,7 @@ def __find_path_components(self, path):
651647
return self.path_cache[path]
652648

653649
with PathRelations.rlock:
654-
self.__log.debug("Locating entry information for path [%s].", path)
650+
# self.__log.debug("Locating entry information for path [%s].", path)
655651
root_id = AccountInfo.get_instance().root_id
656652

657653
# Ensure that the root node is loaded.
@@ -668,9 +664,9 @@ def __find_path_components(self, path):
668664
child_filename_to_search_fs = utility. \
669665
translate_filename_charset(path_parts[i])
670666

671-
self.__log.debug("Checking for part (%d) [%s] under parent "
672-
"with ID [%s].",
673-
i, child_filename_to_search_fs, entry_ptr)
667+
# self.__log.debug("Checking for part (%d) [%s] under parent "
668+
# "with ID [%s].",
669+
# i, child_filename_to_search_fs, entry_ptr)
674670

675671
current_clause = self.entry_ll[entry_ptr]
676672

@@ -752,20 +748,20 @@ def __get_entries_to_update(self, requested_entry_id):
752748
parent_ids = drive_proxy('get_parents_containing_id',
753749
child_id=requested_entry_id)
754750

755-
self.__log.debug("Found (%d) parents.", len(parent_ids))
751+
# self.__log.debug("Found (%d) parents.", len(parent_ids))
756752

757753
affected_entries = [ requested_entry_id ]
758754
considered_entries = { }
759755
max_readahead_entries = Conf.get('max_readahead_entries')
760756
for parent_id in parent_ids:
761-
self.__log.debug("Retrieving children for parent with ID [%s].",
762-
parent_id)
757+
# self.__log.debug("Retrieving children for parent with ID [%s].",
758+
# parent_id)
763759

764760
child_ids = drive_proxy('get_children_under_parent_id',
765761
parent_id=parent_id)
766762

767-
self.__log.debug("(%d) children found under parent with ID [%s].",
768-
len(child_ids), parent_id)
763+
# self.__log.debug("(%d) children found under parent with ID [%s].",
764+
# len(child_ids), parent_id)
769765

770766
for child_id in child_ids:
771767
if child_id == requested_entry_id:
@@ -817,15 +813,15 @@ def __do_update_for_missing_entry(self, requested_entry_id):
817813
for entry_id, entry in retrieved.iteritems():
818814
path_relations.register_entry(entry)
819815

820-
self.__log.debug("(%d) entries were loaded.", len(retrieved))
816+
# self.__log.debug("(%d) entries were loaded.", len(retrieved))
821817

822818
return retrieved
823819

824820
def fault_handler(self, resource_name, requested_entry_id):
825821
"""A requested entry wasn't stored."""
826822

827-
self.__log.debug("EntryCache has faulted on entry with ID [%s].",
828-
requested_entry_id)
823+
# self.__log.debug("EntryCache has faulted on entry with ID [%s].",
824+
# requested_entry_id)
829825

830826
retrieved = self.__do_update_for_missing_entry(requested_entry_id)
831827

gdrivefs/gdfs/fsutility.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ def split_path(filepath_original, pathresolver_cb):
130130
(filepath_original))
131131
raise
132132

133-
log.debug("File-path [%s] split into filepath [%s] and mime_type "
134-
"[%s]." % (filepath_original, filepath, mime_type))
133+
# log.debug("File-path [%s] split into filepath [%s] and mime_type "
134+
# "[%s]." % (filepath_original, filepath, mime_type))
135135

136136
# Split the file-path into a path and a filename.
137137

@@ -141,22 +141,23 @@ def split_path(filepath_original, pathresolver_cb):
141141

142142
try:
143143
path_resolution = pathresolver_cb(path)
144+
# TODO(dustin): We need to specify the exception for when a file doesn't exist.
144145
except:
145146
log.exception("Exception while getting entry from path [%s]." % (path))
146147
raise GdNotFoundError()
147148

148149
if not path_resolution:
149-
log.debug("Path [%s] does not exist for split." % (path))
150+
# log.debug("Path [%s] does not exist for split." % (path))
150151
raise GdNotFoundError()
151152

152153
(parent_entry, parent_clause) = path_resolution
153154

154155
is_hidden = (filename[0] == '.') if filename else False
155156

156-
log.debug("File-path [%s] split into parent with ID [%s], path [%s], "
157-
"unverified filename [%s], mime-type [%s], and is_hidden [%s]." %
158-
(filepath_original, parent_entry.id, path, filename,
159-
mime_type, is_hidden))
157+
# log.debug("File-path [%s] split into parent with ID [%s], path [%s], "
158+
# "unverified filename [%s], mime-type [%s], and is_hidden [%s]." %
159+
# (filepath_original, parent_entry.id, path, filename,
160+
# mime_type, is_hidden))
160161

161162
return (parent_clause, path, filename, mime_type, is_hidden)
162163

gdrivefs/gdfs/opened_file.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -467,19 +467,8 @@ def flush(self):
467467

468468
self.__log.debug("Retrieving entry for write-flush.")
469469

470-
try:
471-
entry = self.__get_entry_or_raise()
472-
except:
473-
self.__log.exception("Could not get entry with ID [%s] for "
474-
"write-flush." % (self.__entry_id))
475-
raise
476-
477-
try:
478-
cache_fault = self.__load_base_from_remote()
479-
except:
480-
self.__log.exception("Could not load local cache for entry [%s]." %
481-
(entry))
482-
raise
470+
entry = self.__get_entry_or_raise()
471+
cache_fault = self.__load_base_from_remote()
483472

484473
with self.__class__.__update_lock:
485474
if self.__is_dirty is False:

gdrivefs/gdtool/chunked_download.py

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from time import time
1+
import logging
2+
3+
from time import time, sleep
24
from random import random
35

46
from oauth2client import util
@@ -7,6 +9,8 @@
79

810
DEFAULT_CHUNK_SIZE = 1024 * 512
911

12+
_logger = logging.getLogger(__name__)
13+
1014

1115
class ChunkedDownload(object):
1216
""""Download an entry, chunk by chunk. This code is mostly identical to
@@ -56,36 +60,41 @@ def next_chunk(self, num_retries=0):
5660
apiclient.errors.HttpError if the response was not a 2xx.
5761
httplib2.HttpLib2Error if a transport error has occured.
5862
"""
63+
5964
headers = {
6065
'range': 'bytes=%d-%d' % (
6166
self._progress, self._progress + self._chunksize)
6267
}
6368

6469
for retry_num in xrange(num_retries + 1):
65-
if retry_num > 0:
66-
self._sleep(self._rand() * 2**retry_num)
67-
logging.warning(
68-
'Retry #%d for media download: GET %s, following status: %d'
69-
% (retry_num, self._uri, resp.status))
70-
71-
resp, content = self._http.request(self._uri, headers=headers)
72-
if resp.status < 500:
73-
break
74-
75-
if resp.status in [200, 206]:
76-
if 'content-location' in resp and resp['content-location'] != self._uri:
70+
if retry_num > 0:
71+
self._sleep(self._rand() * 2**retry_num)
72+
logging.warning(
73+
'Retry #%d for media download: GET %s, following status: %d' %
74+
(retry_num, self._uri, resp.status))
75+
76+
resp, content = self._http.request(self._uri, headers=headers)
77+
if resp.status < 500:
78+
break
79+
80+
if resp.status not in (200, 206):
81+
raise HttpError(resp, content, uri=self._uri)
82+
83+
if 'content-location' in resp and resp['content-location'] != self._uri:
7784
self._uri = resp['content-location']
78-
self._progress += len(content)
79-
self._fd.write(content)
8085

81-
if 'content-range' in resp:
86+
self._progress += len(content)
87+
self._fd.write(content)
88+
89+
if 'content-length' in resp:
90+
self._total_size = int(resp['content-length'])
91+
elif 'content-range' in resp:
8292
content_range = resp['content-range']
8393
length = content_range.rsplit('/', 1)[1]
8494
self._total_size = int(length)
8595

86-
if self._progress == self._total_size:
96+
if self._progress == self._total_size:
8797
self._done = True
88-
return MediaDownloadProgress(self._progress, self._total_size), self._done
89-
else:
90-
raise HttpError(resp, content, uri=self._uri)
98+
99+
return MediaDownloadProgress(self._progress, self._total_size), self._done
91100

gdrivefs/gdtool/drive.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def download_to_local(self, output_file_path, normalized_entry, mime_type,
322322
"""
323323

324324
self.__log.debug("Downloading entry with ID [%s] and mime-type [%s].",
325-
(normalized_entry.id, mime_type))
325+
normalized_entry.id, mime_type)
326326

327327
if mime_type != normalized_entry.mime_type and \
328328
mime_type not in normalized_entry.download_links:
@@ -377,8 +377,8 @@ def download_to_local(self, output_file_path, normalized_entry, mime_type,
377377

378378
url = normalized_entry.download_links[mime_type]
379379

380-
self.__log.debug("Downloading file from [%s]." % (url))
381-
380+
# self.__log.debug("Downloading file from [%s]." % (url))
381+
#
382382
# try:
383383
# data_tuple = authed_http.request(url)
384384
# except:
@@ -657,7 +657,12 @@ def proxied_method(auto_refresh=True, **kwargs):
657657

658658
time.sleep((2 ** n) + random.randint(0, 1000) / 1000)
659659
except HttpError as e:
660-
error = json.loads(e.content)
660+
try:
661+
error = json.loads(e.content)
662+
except ValueError:
663+
_logger.error("Non-JSON error while doing chunked "
664+
"download: %s", e.content)
665+
661666
if error.get('code') == 403 and \
662667
error.get('errors')[0].get('reason') \
663668
in ['rateLimitExceeded', 'userRateLimitExceeded']:

gdrivefs/gdtool/normal_entry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def xattr_data(self):
201201

202202
attrs = {}
203203
for a_type, a_dict in data_dict.iteritems():
204-
self.__log.debug("Setting [%s]." % (a_type))
204+
# self.__log.debug("Setting [%s]." % (a_type))
205205
for key, value in a_dict.iteritems():
206206
fqkey = ('user.%s.%s' % (a_type, key))
207207
attrs[fqkey] = self.__convert(value)

gdrivefs/log_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
default_logger = getLogger()
99
default_logger.setLevel(logging.WARNING)
10+
#default_logger.setLevel(logging.DEBUG)
1011

1112
# Log to syslog.
1213

0 commit comments

Comments
 (0)