diff --git a/README.md b/README.md index d459676..e30816a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,44 @@ storage-file-tranfer-json-python ================================ -Uploads and downloads files between Google Cloud Storage and the local filesystem using the Google Python Client Library. \ No newline at end of file +Uploads and downloads files between Google Cloud Storage and the local filesystem using the Google Python Client Library.Uploads and downloads files between Google Cloud Storage and the local +filesystem using the Google APIs Python Client Library. + +api: storage +keywords: cmdline, media, oauth2 + +chunked_transfer.py +SETUP: + 1. Download chunked_transfer.py and client_secrets.json to the same directory. + 2. As of August 2012, the Google APIs interface to Google Cloud Storage + (a.k.a. the GCS JSON API) is in Limited Preview, so users must request access + from the API Console Services tab: + + 3. Visit the API Console Access tab to create a client ID and secret for an + installed application: + + 4. Add your client ID and secret to your client_secrets.json file. + 5. The first time the script runs, it will open a browser window asking for + permission to access Google Cloud Storage on your behalf. After you've + granted permission, the script creates a credentials.json file, which stores + the access and refresh tokens acquired from the OAuth flow. + +USAGE: + This script uploads and downloads files in chunks between Google Cloud + Storage and your local filesystem. It accepts two arguments: a file path and + a Cloud Storage resource name in "gs:///" format. The order + of these arguments dictates whether to upload or download (source first, + destination second). + + For example, this command uploads filename to bucket 'foo' with object name + 'bar': + + $ python chunked_transfer.py Desktop/file_1 gs://foo/bar + + and this command downloads that object back to a file on the desktop: + + $ python chunked_transfer.py gs://foo/bar Desktop/file_2 + + When an upload concludes, the script prints the new object's JSON + representation. + diff --git a/chunked_transfer.py b/chunked_transfer.py new file mode 100644 index 0000000..a370d80 --- /dev/null +++ b/chunked_transfer.py @@ -0,0 +1,216 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Uploads or downloads files between Google Cloud Storage and the filesystem. + +The file is transfered in CHUNKSIZE pieces, and the process can resume in case +of some failures. + +Usage examples: + $ python chunked_transfer.py gs://bucket/object ~/Desktop/filename + $ python chunked_transfer.py ~/Desktop/filename gs://bucket/object + +""" + +import httplib2 +import os +import random +import sys +import time + +from apiclient.discovery import build as discovery_build +from apiclient.errors import HttpError +from apiclient.http import MediaFileUpload +from apiclient.http import MediaIoBaseDownload +from json import dumps as json_dumps +from oauth2client.client import flow_from_clientsecrets +from oauth2client.file import Storage as CredentialStorage +from oauth2client.tools import run as run_oauth2 + + +# CLIENT_SECRETS_FILE, name of a file containing the OAuth 2.0 information for +# this application, including client_id and client_secret. You can acquire an +# ID/secret pair from the API Access tab on the Google APIs Console +# +# For more information about using OAuth2 to access Google APIs, please visit: +# +CLIENT_SECRETS_FILE = 'client_secrets.json' + +# File where we will store authentication credentials after acquiring them. +CREDENTIALS_FILE = 'credentials.json' + +# Message describing how to use the script. +USAGE = """ +Usage examples: + $ python chunked_transfer.py gs://bucket/object ~/Desktop/filename + $ python chunked_transfer.py ~/Desktop/filename gs://bucket/object + +""" + +RW_SCOPE = 'https://www.googleapis.com/auth/devstorage.read_write' +RO_SCOPE = 'https://www.googleapis.com/auth/devstorage.read_only' + +# Helpful message to display if the CLIENT_SECRETS_FILE is missing. +MISSING_CLIENT_SECRETS_MESSAGE = """ +WARNING: Please configure OAuth 2.0 + +To make this sample run you will need to populate the client_secrets.json file +found at: + + %s + +with information from the APIs Console +. + +""" % os.path.abspath(os.path.join(os.path.dirname(__file__), + CLIENT_SECRETS_FILE)) + +# Retry transport and file IO errors. +RETRYABLE_ERRORS = (httplib2.HttpLib2Error, IOError) + +# A collection of non-recoverable status codes. +NON_RETRYABLE_CODES = (500, 502, 503, 504) + +# Number of bytes to send/receive in each request. +CHUNKSIZE = 2 * 1024 * 1024 + +# Mimetype to use if one can't be guessed from the file extension. +DEFAULT_MIMETYPE = 'application/octet-stream' + + +def get_authenticated_service(scope): + print 'Authenticating...' + flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=scope, + message=MISSING_CLIENT_SECRETS_MESSAGE) + + credential_storage = CredentialStorage('credentials.json') + credentials = credential_storage.get() + if credentials is None or credentials.invalid: + credentials = run_oauth2(flow, credential_storage) + + print 'Constructing Google Cloud Storage service...' + http = credentials.authorize(httplib2.Http()) + return discovery_build('storage', 'v1beta1', http=http) + + +def handle_progressless_iter(error, progressless_iters, num_retries): + if progressless_iters > num_retries: + print 'Failed to make progress for too many consecutive iterations.' + raise error + + sleeptime = random.random() * (2**progressless_iters) + print ('Caught exception (%s). Sleeping for %s seconds before retry #%d.' + % (str(error), sleeptime, progressless_iters)) + time.sleep(sleeptime) + + +def print_with_carriage_return(s): + sys.stdout.write('\r' + s) + sys.stdout.flush() + + +def upload(argv): + filename = argv[1] + bucket_name, object_name = argv[2][5:].split('/', 1) + + service = get_authenticated_service(RW_SCOPE) + + print 'Building upload request...' + media = MediaFileUpload(filename, chunksize=CHUNKSIZE, resumable=True) + if not media.mimetype(): + media = MediaFileUpload(filename, DEFAULT_MIMETYPE, resumable=True) + request = service.objects().insert(bucket=bucket_name, name=object_name, + media_body=media) + + print 'Uploading file: %s to bucket: %s object: %s ' % (filename, bucket_name, + object_name) + + progressless_iters = 0 + response = None + while response is None: + try: + error = None + progress, response = request.next_chunk() + if progress: + print_with_carriage_return('Upload %d%%' % (100 * progress.progress())) + progressless_iters = 0 + except HttpError, err: + error = err + if err.resp.status not in NON_RETRYABLE_CODES: + raise + except RETRYABLE_ERRORS, err: + error = err + + if error is not None: + progressless_iters += 1 + handle_progressless_iter(error, progressless_iters, num_retries) + else: + progressless_iters = 0 + print '\nUpload complete!' + + print 'Uploaded Object:' + print json_dumps(response, indent=2) + + +def download(argv): + bucket_name, object_name = argv[1][5:].split('/', 1) + filename = argv[2] + + service = get_authenticated_service(RO_SCOPE) + + print 'Building download request...' + f = file(filename, 'w') + request = service.objects().get_media(bucket=bucket_name, + object=object_name) + media = MediaIoBaseDownload(f, request, chunksize=CHUNKSIZE) + + print 'Downloading bucket: %s object: %s to file: %s' % (bucket_name, + object_name, + filename) + + progressless_iters = 0 + done = False + while not done: + try: + error = None + progress, done = media.next_chunk() + if progress: + print_with_carriage_return('Download %d%%.' + % int(progress.progress() * 100)) + except HttpError, err: + error = err + if err.resp.status not in NON_RETRYABLE_CODES: + raise + except RETRYABLE_ERRORS, err: + error = err + + if error is not None: + progressless_iters += 1 + handle_progressless_iter(error, progressless_iters, num_retries) + else: + progressless_iters = 0 + + print '\nDownload complete!' + + +if __name__ == '__main__': + if len(sys.argv) < 3: + print 'Too few arguments.' + print USAGE + if sys.argv[2].startswith('gs://'): + upload(sys.argv) + elif sys.argv[1].startswith('gs://'): + download(sys.argv) + else: + print USAGE diff --git a/client_secrets.json b/client_secrets.json new file mode 100644 index 0000000..a6d6520 --- /dev/null +++ b/client_secrets.json @@ -0,0 +1,9 @@ +{ + "installed": { + "client_id": "INSERT CLIENT ID HERE", + "client_secret": "INSERT CLIENT SECRET HERE", + "redirect_uris": [], + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://accounts.google.com/o/oauth2/token" + } +}